From 2415e66f889f38503b73e8ebc5f43ca342390e5c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 18:49:24 +0200 Subject: Adding upstream version 2024.03.10. Signed-off-by: Daniel Baumann --- .editorconfig | 8 + .gitattributes | 6 + .github/FUNDING.yml | 13 + .github/ISSUE_TEMPLATE/1_broken_site.yml | 79 + .github/ISSUE_TEMPLATE/2_site_support_request.yml | 91 + .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 87 + .github/ISSUE_TEMPLATE/4_bug_report.yml | 72 + .github/ISSUE_TEMPLATE/5_feature_request.yml | 66 + .github/ISSUE_TEMPLATE/6_question.yml | 72 + .github/ISSUE_TEMPLATE/config.yml | 8 + .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml | 40 + .../ISSUE_TEMPLATE_tmpl/2_site_support_request.yml | 52 + .../ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml | 48 + .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml | 33 + .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml | 31 + .github/ISSUE_TEMPLATE_tmpl/6_question.yml | 37 + .github/PULL_REQUEST_TEMPLATE.md | 43 + .github/banner.svg | 31 + .github/workflows/build.yml | 487 ++ .github/workflows/codeql.yml | 65 + .github/workflows/core.yml | 61 + .github/workflows/download.yml | 48 + .github/workflows/quick-test.yml | 35 + .github/workflows/release-master.yml | 29 + .github/workflows/release-nightly.yml | 42 + .github/workflows/release.yml | 387 + .gitignore | 128 + CONTRIBUTING.md | 731 ++ CONTRIBUTORS | 602 ++ Changelog.md | 4280 ++++++++++++ Collaborators.md | 63 + LICENSE | 24 + Makefile | 160 + README.md | 2317 ++++++ bundle/__init__.py | 0 bundle/py2exe.py | 59 + bundle/pyinstaller.py | 132 + devscripts/__init__.py | 0 devscripts/bash-completion.in | 29 + devscripts/bash-completion.py | 31 + devscripts/changelog_override.json | 130 + devscripts/changelog_override.schema.json | 96 + devscripts/check-porn.py | 61 + devscripts/cli_to_api.py | 48 + devscripts/fish-completion.in | 5 + devscripts/fish-completion.py | 52 + devscripts/generate_aes_testdata.py | 46 + devscripts/install_deps.py | 73 + devscripts/lazy_load_template.py | 39 + devscripts/logo.ico | Bin 0 -> 41043 bytes devscripts/make_changelog.py | 503 ++ devscripts/make_contributing.py | 32 + devscripts/make_issue_template.py | 72 + devscripts/make_lazy_extractors.py | 132 + devscripts/make_readme.py | 93 + devscripts/make_supportedsites.py | 20 + devscripts/prepare_manpage.py | 97 + devscripts/run_tests.bat | 4 + devscripts/run_tests.py | 71 + devscripts/run_tests.sh | 4 + devscripts/set-variant.py | 36 + devscripts/tomlparse.py | 189 + devscripts/update-version.py | 82 + devscripts/utils.py | 47 + devscripts/zsh-completion.in | 30 + devscripts/zsh-completion.py | 50 + public.key | 29 + pyinst.py | 17 + pyproject.toml | 120 + setup.cfg | 45 + setup.py | 36 + supportedsites.md | 1794 +++++ test/__init__.py | 0 test/conftest.py | 26 + test/helper.py | 340 + test/parameters.json | 49 + test/test_InfoExtractor.py | 1911 +++++ test/test_YoutubeDL.py | 1346 ++++ test/test_YoutubeDLCookieJar.py | 66 + test/test_aes.py | 152 + test/test_age_restriction.py | 55 + test/test_all_urls.py | 122 + test/test_cache.py | 57 + test/test_compat.py | 105 + test/test_config.py | 227 + test/test_cookies.py | 306 + test/test_download.py | 314 + test/test_downloader_external.py | 139 + test/test_downloader_http.py | 106 + test/test_execution.py | 60 + test/test_iqiyi_sdk_interpreter.py | 44 + test/test_jsinterp.py | 380 + test/test_netrc.py | 28 + test/test_networking.py | 1631 +++++ test/test_networking_utils.py | 208 + test/test_overwrites.py | 54 + test/test_plugins.py | 73 + test/test_post_hooks.py | 70 + test/test_postprocessors.py | 579 ++ test/test_socks.py | 477 ++ test/test_subtitles.py | 452 ++ test/test_update.py | 228 + test/test_utils.py | 2457 +++++++ test/test_verbose_output.py | 75 + test/test_websockets.py | 383 + test/test_write_annotations.py.disabled | 77 + test/test_youtube_lists.py | 71 + test/test_youtube_misc.py | 26 + test/test_youtube_signature.py | 253 + test/testcert.pem | 52 + test/testdata/certificate/ca.crt | 10 + test/testdata/certificate/ca.key | 5 + test/testdata/certificate/ca.srl | 1 + test/testdata/certificate/client.crt | 9 + test/testdata/certificate/client.csr | 7 + test/testdata/certificate/client.key | 5 + test/testdata/certificate/clientencrypted.key | 8 + .../certificate/clientwithencryptedkey.crt | 17 + test/testdata/certificate/clientwithkey.crt | 14 + test/testdata/certificate/instructions.md | 19 + test/testdata/cookies/httponly_cookies.txt | 6 + test/testdata/cookies/malformed_cookies.txt | 9 + test/testdata/cookies/session_cookies.txt | 6 + test/testdata/f4m/custom_base_url.f4m | 10 + test/testdata/ism/ec-3_test.Manifest | 1 + test/testdata/ism/sintel.Manifest | 988 +++ test/testdata/m3u8/bipbop_16x9.m3u8 | 38 + .../testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8 | 76 + test/testdata/mpd/float_duration.mpd | 18 + test/testdata/mpd/subtitles.mpd | 351 + test/testdata/mpd/unfragmented.mpd | 28 + test/testdata/mpd/urls_only.mpd | 218 + test/testdata/thumbnails/foo %d bar/foo_%d.webp | Bin 0 -> 3928 bytes test/testdata/xspf/foo_xspf.xspf | 34 + test/testdata/yt_dlp_plugins/extractor/_ignore.py | 5 + test/testdata/yt_dlp_plugins/extractor/ignore.py | 12 + test/testdata/yt_dlp_plugins/extractor/normal.py | 9 + .../yt_dlp_plugins/postprocessor/normal.py | 5 + .../yt_dlp_plugins/extractor/zipped.py | 5 + .../yt_dlp_plugins/postprocessor/zipped.py | 5 + yt-dlp.cmd | 1 + yt-dlp.sh | 2 + yt_dlp/YoutubeDL.py | 4339 ++++++++++++ yt_dlp/__init__.py | 1054 +++ yt_dlp/__main__.py | 17 + yt_dlp/__pyinstaller/__init__.py | 5 + yt_dlp/__pyinstaller/hook-yt_dlp.py | 34 + yt_dlp/aes.py | 567 ++ yt_dlp/cache.py | 91 + yt_dlp/compat/__init__.py | 79 + yt_dlp/compat/_deprecated.py | 23 + yt_dlp/compat/_legacy.py | 108 + yt_dlp/compat/compat_utils.py | 83 + yt_dlp/compat/functools.py | 12 + yt_dlp/compat/imghdr.py | 16 + yt_dlp/compat/shutil.py | 30 + yt_dlp/compat/types.py | 13 + yt_dlp/compat/urllib/__init__.py | 10 + yt_dlp/compat/urllib/request.py | 40 + yt_dlp/cookies.py | 1346 ++++ yt_dlp/dependencies/Cryptodome.py | 38 + yt_dlp/dependencies/__init__.py | 92 + yt_dlp/downloader/__init__.py | 131 + yt_dlp/downloader/common.py | 486 ++ yt_dlp/downloader/dash.py | 90 + yt_dlp/downloader/external.py | 664 ++ yt_dlp/downloader/f4m.py | 427 ++ yt_dlp/downloader/fc2.py | 46 + yt_dlp/downloader/fragment.py | 527 ++ yt_dlp/downloader/hls.py | 378 + yt_dlp/downloader/http.py | 383 + yt_dlp/downloader/ism.py | 283 + yt_dlp/downloader/mhtml.py | 189 + yt_dlp/downloader/niconico.py | 140 + yt_dlp/downloader/rtmp.py | 213 + yt_dlp/downloader/rtsp.py | 42 + yt_dlp/downloader/websocket.py | 53 + yt_dlp/downloader/youtube_live_chat.py | 228 + yt_dlp/extractor/__init__.py | 42 + yt_dlp/extractor/_extractors.py | 2493 +++++++ yt_dlp/extractor/abc.py | 421 ++ yt_dlp/extractor/abcnews.py | 153 + yt_dlp/extractor/abcotvs.py | 130 + yt_dlp/extractor/abematv.py | 484 ++ yt_dlp/extractor/academicearth.py | 39 + yt_dlp/extractor/acast.py | 143 + yt_dlp/extractor/acfun.py | 200 + yt_dlp/extractor/adn.py | 335 + yt_dlp/extractor/adobeconnect.py | 34 + yt_dlp/extractor/adobepass.py | 1778 +++++ yt_dlp/extractor/adobetv.py | 286 + yt_dlp/extractor/adultswim.py | 198 + yt_dlp/extractor/aenetworks.py | 369 + yt_dlp/extractor/aeonco.py | 74 + yt_dlp/extractor/afreecatv.py | 484 ++ yt_dlp/extractor/agora.py | 251 + yt_dlp/extractor/airtv.py | 96 + yt_dlp/extractor/aitube.py | 60 + yt_dlp/extractor/aliexpress.py | 50 + yt_dlp/extractor/aljazeera.py | 83 + yt_dlp/extractor/allocine.py | 125 + yt_dlp/extractor/allstar.py | 253 + yt_dlp/extractor/alphaporno.py | 75 + yt_dlp/extractor/alsace20tv.py | 83 + yt_dlp/extractor/altcensored.py | 104 + yt_dlp/extractor/alura.py | 167 + yt_dlp/extractor/amadeustv.py | 77 + yt_dlp/extractor/amara.py | 100 + yt_dlp/extractor/amazon.py | 170 + yt_dlp/extractor/amazonminitv.py | 294 + yt_dlp/extractor/amcnetworks.py | 147 + yt_dlp/extractor/americastestkitchen.py | 215 + yt_dlp/extractor/amp.py | 101 + yt_dlp/extractor/anchorfm.py | 98 + yt_dlp/extractor/angel.py | 56 + yt_dlp/extractor/antenna.py | 143 + yt_dlp/extractor/anvato.py | 404 ++ yt_dlp/extractor/aol.py | 133 + yt_dlp/extractor/apa.py | 82 + yt_dlp/extractor/aparat.py | 88 + yt_dlp/extractor/appleconnect.py | 50 + yt_dlp/extractor/applepodcasts.py | 85 + yt_dlp/extractor/appletrailers.py | 278 + yt_dlp/extractor/archiveorg.py | 947 +++ yt_dlp/extractor/arcpublishing.py | 164 + yt_dlp/extractor/ard.py | 579 ++ yt_dlp/extractor/arkena.py | 150 + yt_dlp/extractor/arnes.py | 98 + yt_dlp/extractor/art19.py | 303 + yt_dlp/extractor/arte.py | 345 + yt_dlp/extractor/asobichannel.py | 168 + yt_dlp/extractor/atresplayer.py | 104 + yt_dlp/extractor/atscaleconf.py | 34 + yt_dlp/extractor/atvat.py | 108 + yt_dlp/extractor/audimedia.py | 89 + yt_dlp/extractor/audioboom.py | 57 + yt_dlp/extractor/audiodraft.py | 93 + yt_dlp/extractor/audiomack.py | 147 + yt_dlp/extractor/audius.py | 271 + yt_dlp/extractor/awaan.py | 184 + yt_dlp/extractor/aws.py | 75 + yt_dlp/extractor/axs.py | 89 + yt_dlp/extractor/azmedien.py | 66 + yt_dlp/extractor/baidu.py | 51 + yt_dlp/extractor/banbye.py | 168 + yt_dlp/extractor/bandaichannel.py | 33 + yt_dlp/extractor/bandcamp.py | 485 ++ yt_dlp/extractor/bannedvideo.py | 155 + yt_dlp/extractor/bbc.py | 1660 +++++ yt_dlp/extractor/beatbump.py | 111 + yt_dlp/extractor/beatport.py | 97 + yt_dlp/extractor/beeg.py | 90 + yt_dlp/extractor/behindkink.py | 42 + yt_dlp/extractor/bellmedia.py | 91 + yt_dlp/extractor/berufetv.py | 70 + yt_dlp/extractor/bet.py | 79 + yt_dlp/extractor/bfi.py | 35 + yt_dlp/extractor/bfmtv.py | 119 + yt_dlp/extractor/bibeltv.py | 197 + yt_dlp/extractor/bigflix.py | 73 + yt_dlp/extractor/bigo.py | 57 + yt_dlp/extractor/bild.py | 63 + yt_dlp/extractor/bilibili.py | 2233 ++++++ yt_dlp/extractor/biobiochiletv.py | 83 + yt_dlp/extractor/bitchute.py | 275 + yt_dlp/extractor/blackboardcollaborate.py | 63 + yt_dlp/extractor/bleacherreport.py | 110 + yt_dlp/extractor/blerp.py | 167 + yt_dlp/extractor/blogger.py | 45 + yt_dlp/extractor/bloomberg.py | 77 + yt_dlp/extractor/bokecc.py | 53 + yt_dlp/extractor/bongacams.py | 70 + yt_dlp/extractor/boosty.py | 209 + yt_dlp/extractor/bostonglobe.py | 69 + yt_dlp/extractor/box.py | 83 + yt_dlp/extractor/boxcast.py | 102 + yt_dlp/extractor/bpb.py | 170 + yt_dlp/extractor/br.py | 166 + yt_dlp/extractor/brainpop.py | 318 + yt_dlp/extractor/bravotv.py | 189 + yt_dlp/extractor/breitbart.py | 34 + yt_dlp/extractor/brightcove.py | 952 +++ yt_dlp/extractor/brilliantpala.py | 127 + yt_dlp/extractor/bundesliga.py | 34 + yt_dlp/extractor/bundestag.py | 123 + yt_dlp/extractor/businessinsider.py | 45 + yt_dlp/extractor/buzzfeed.py | 95 + yt_dlp/extractor/byutv.py | 104 + yt_dlp/extractor/c56.py | 59 + yt_dlp/extractor/cableav.py | 32 + yt_dlp/extractor/callin.py | 155 + yt_dlp/extractor/caltrans.py | 37 + yt_dlp/extractor/cam4.py | 31 + yt_dlp/extractor/camdemy.py | 158 + yt_dlp/extractor/camfm.py | 85 + yt_dlp/extractor/cammodels.py | 77 + yt_dlp/extractor/camsoda.py | 57 + yt_dlp/extractor/camtasia.py | 71 + yt_dlp/extractor/canal1.py | 39 + yt_dlp/extractor/canalalpha.py | 94 + yt_dlp/extractor/canalc2.py | 68 + yt_dlp/extractor/canalplus.py | 110 + yt_dlp/extractor/caracoltv.py | 136 + yt_dlp/extractor/cartoonnetwork.py | 59 + yt_dlp/extractor/cbc.py | 653 ++ yt_dlp/extractor/cbs.py | 280 + yt_dlp/extractor/cbsnews.py | 443 ++ yt_dlp/extractor/cbssports.py | 111 + yt_dlp/extractor/ccc.py | 115 + yt_dlp/extractor/ccma.py | 147 + yt_dlp/extractor/cctv.py | 201 + yt_dlp/extractor/cda.py | 338 + yt_dlp/extractor/cellebrite.py | 63 + yt_dlp/extractor/ceskatelevize.py | 289 + yt_dlp/extractor/cgtn.py | 65 + yt_dlp/extractor/charlierose.py | 50 + yt_dlp/extractor/chaturbate.py | 106 + yt_dlp/extractor/chilloutzone.py | 123 + yt_dlp/extractor/chzzk.py | 139 + yt_dlp/extractor/cinemax.py | 25 + yt_dlp/extractor/cinetecamilano.py | 61 + yt_dlp/extractor/cineverse.py | 139 + yt_dlp/extractor/ciscolive.py | 145 + yt_dlp/extractor/ciscowebex.py | 106 + yt_dlp/extractor/cjsw.py | 67 + yt_dlp/extractor/clipchamp.py | 61 + yt_dlp/extractor/clippit.py | 70 + yt_dlp/extractor/cliprs.py | 31 + yt_dlp/extractor/closertotruth.py | 89 + yt_dlp/extractor/cloudflarestream.py | 76 + yt_dlp/extractor/cloudycdn.py | 79 + yt_dlp/extractor/clubic.py | 53 + yt_dlp/extractor/clyp.py | 99 + yt_dlp/extractor/cmt.py | 55 + yt_dlp/extractor/cnbc.py | 97 + yt_dlp/extractor/cnn.py | 198 + yt_dlp/extractor/comedycentral.py | 55 + yt_dlp/extractor/common.py | 3943 +++++++++++ yt_dlp/extractor/commonmistakes.py | 42 + yt_dlp/extractor/commonprotocols.py | 70 + yt_dlp/extractor/condenast.py | 250 + yt_dlp/extractor/contv.py | 113 + yt_dlp/extractor/corus.py | 154 + yt_dlp/extractor/coub.py | 136 + yt_dlp/extractor/cozytv.py | 37 + yt_dlp/extractor/cpac.py | 136 + yt_dlp/extractor/cracked.py | 88 + yt_dlp/extractor/crackle.py | 243 + yt_dlp/extractor/craftsy.py | 75 + yt_dlp/extractor/crooksandliars.py | 56 + yt_dlp/extractor/crowdbunker.py | 109 + yt_dlp/extractor/crtvg.py | 53 + yt_dlp/extractor/crunchyroll.py | 650 ++ yt_dlp/extractor/cspan.py | 286 + yt_dlp/extractor/ctsnews.py | 84 + yt_dlp/extractor/ctv.py | 49 + yt_dlp/extractor/ctvnews.py | 70 + yt_dlp/extractor/cultureunplugged.py | 65 + yt_dlp/extractor/curiositystream.py | 203 + yt_dlp/extractor/cwtv.py | 99 + yt_dlp/extractor/cybrary.py | 144 + yt_dlp/extractor/dacast.py | 158 + yt_dlp/extractor/dailymail.py | 73 + yt_dlp/extractor/dailymotion.py | 474 ++ yt_dlp/extractor/dailywire.py | 113 + yt_dlp/extractor/damtomo.py | 108 + yt_dlp/extractor/daum.py | 258 + yt_dlp/extractor/daystar.py | 47 + yt_dlp/extractor/dbtv.py | 47 + yt_dlp/extractor/dctp.py | 102 + yt_dlp/extractor/deezer.py | 142 + yt_dlp/extractor/democracynow.py | 91 + yt_dlp/extractor/detik.py | 159 + yt_dlp/extractor/deuxm.py | 76 + yt_dlp/extractor/dfb.py | 52 + yt_dlp/extractor/dhm.py | 58 + yt_dlp/extractor/digitalconcerthall.py | 150 + yt_dlp/extractor/digiteka.py | 98 + yt_dlp/extractor/discogs.py | 35 + yt_dlp/extractor/discovery.py | 115 + yt_dlp/extractor/discoverygo.py | 172 + yt_dlp/extractor/disney.py | 160 + yt_dlp/extractor/dispeak.py | 127 + yt_dlp/extractor/dlf.py | 192 + yt_dlp/extractor/dlive.py | 92 + yt_dlp/extractor/douyutv.py | 306 + yt_dlp/extractor/dplay.py | 1059 +++ yt_dlp/extractor/drbonanza.py | 54 + yt_dlp/extractor/dreisat.py | 41 + yt_dlp/extractor/drooble.py | 113 + yt_dlp/extractor/dropbox.py | 90 + yt_dlp/extractor/dropout.py | 224 + yt_dlp/extractor/drtuber.py | 104 + yt_dlp/extractor/drtv.py | 401 ++ yt_dlp/extractor/dtube.py | 80 + yt_dlp/extractor/duboku.py | 247 + yt_dlp/extractor/dumpert.py | 114 + yt_dlp/extractor/duoplay.py | 104 + yt_dlp/extractor/dvtv.py | 177 + yt_dlp/extractor/dw.py | 110 + yt_dlp/extractor/eagleplatform.py | 215 + yt_dlp/extractor/ebaumsworld.py | 31 + yt_dlp/extractor/ebay.py | 36 + yt_dlp/extractor/egghead.py | 134 + yt_dlp/extractor/eighttracks.py | 161 + yt_dlp/extractor/einthusan.py | 105 + yt_dlp/extractor/eitb.py | 79 + yt_dlp/extractor/elementorembed.py | 72 + yt_dlp/extractor/elonet.py | 64 + yt_dlp/extractor/elpais.py | 92 + yt_dlp/extractor/eltrecetv.py | 62 + yt_dlp/extractor/embedly.py | 109 + yt_dlp/extractor/epicon.py | 115 + yt_dlp/extractor/epidemicsound.py | 107 + yt_dlp/extractor/eplus.py | 183 + yt_dlp/extractor/epoch.py | 55 + yt_dlp/extractor/eporner.py | 137 + yt_dlp/extractor/erocast.py | 63 + yt_dlp/extractor/eroprofile.py | 122 + yt_dlp/extractor/err.py | 224 + yt_dlp/extractor/ertgr.py | 302 + yt_dlp/extractor/espn.py | 421 ++ yt_dlp/extractor/ettutv.py | 60 + yt_dlp/extractor/europa.py | 174 + yt_dlp/extractor/europeantour.py | 34 + yt_dlp/extractor/eurosport.py | 123 + yt_dlp/extractor/euscreen.py | 60 + yt_dlp/extractor/expressen.py | 96 + yt_dlp/extractor/extractors.py | 28 + yt_dlp/extractor/eyedotv.py | 61 + yt_dlp/extractor/facebook.py | 1060 +++ yt_dlp/extractor/fancode.py | 181 + yt_dlp/extractor/faz.py | 89 + yt_dlp/extractor/fc2.py | 280 + yt_dlp/extractor/fczenit.py | 51 + yt_dlp/extractor/fifa.py | 83 + yt_dlp/extractor/filmon.py | 171 + yt_dlp/extractor/filmweb.py | 38 + yt_dlp/extractor/firsttv.py | 152 + yt_dlp/extractor/fivetv.py | 85 + yt_dlp/extractor/flextv.py | 62 + yt_dlp/extractor/flickr.py | 114 + yt_dlp/extractor/floatplane.py | 333 + yt_dlp/extractor/folketinget.py | 73 + yt_dlp/extractor/footyroom.py | 53 + yt_dlp/extractor/formula1.py | 24 + yt_dlp/extractor/fourtube.py | 306 + yt_dlp/extractor/fox.py | 177 + yt_dlp/extractor/fox9.py | 38 + yt_dlp/extractor/foxnews.py | 185 + yt_dlp/extractor/foxsports.py | 52 + yt_dlp/extractor/fptplay.py | 117 + yt_dlp/extractor/franceinter.py | 56 + yt_dlp/extractor/francetv.py | 423 ++ yt_dlp/extractor/freesound.py | 77 + yt_dlp/extractor/freespeech.py | 29 + yt_dlp/extractor/freetv.py | 139 + yt_dlp/extractor/frontendmasters.py | 252 + yt_dlp/extractor/fujitv.py | 71 + yt_dlp/extractor/funimation.py | 349 + yt_dlp/extractor/funk.py | 40 + yt_dlp/extractor/funker530.py | 80 + yt_dlp/extractor/fuyintv.py | 30 + yt_dlp/extractor/gab.py | 140 + yt_dlp/extractor/gaia.py | 122 + yt_dlp/extractor/gamejolt.py | 537 ++ yt_dlp/extractor/gamespot.py | 75 + yt_dlp/extractor/gamestar.py | 60 + yt_dlp/extractor/gaskrank.py | 96 + yt_dlp/extractor/gazeta.py | 44 + yt_dlp/extractor/gdcvault.py | 214 + yt_dlp/extractor/gedidigital.py | 198 + yt_dlp/extractor/generic.py | 2849 ++++++++ yt_dlp/extractor/genericembeds.py | 114 + yt_dlp/extractor/genius.py | 145 + yt_dlp/extractor/getcourseru.py | 178 + yt_dlp/extractor/gettr.py | 206 + yt_dlp/extractor/giantbomb.py | 85 + yt_dlp/extractor/gigya.py | 20 + yt_dlp/extractor/glide.py | 38 + yt_dlp/extractor/globalplayer.py | 254 + yt_dlp/extractor/globo.py | 246 + yt_dlp/extractor/glomex.py | 216 + yt_dlp/extractor/gmanetwork.py | 83 + yt_dlp/extractor/go.py | 333 + yt_dlp/extractor/godtube.py | 55 + yt_dlp/extractor/gofile.py | 106 + yt_dlp/extractor/golem.py | 68 + yt_dlp/extractor/goodgame.py | 57 + yt_dlp/extractor/googledrive.py | 341 + yt_dlp/extractor/googlepodcasts.py | 84 + yt_dlp/extractor/googlesearch.py | 38 + yt_dlp/extractor/goplay.py | 433 ++ yt_dlp/extractor/gopro.py | 105 + yt_dlp/extractor/goshgay.py | 48 + yt_dlp/extractor/gotostage.py | 70 + yt_dlp/extractor/gputechconf.py | 32 + yt_dlp/extractor/gronkh.py | 120 + yt_dlp/extractor/groupon.py | 64 + yt_dlp/extractor/harpodeon.py | 70 + yt_dlp/extractor/hbo.py | 171 + yt_dlp/extractor/hearthisat.py | 96 + yt_dlp/extractor/heise.py | 207 + yt_dlp/extractor/hellporno.py | 72 + yt_dlp/extractor/hgtv.py | 37 + yt_dlp/extractor/hidive.py | 119 + yt_dlp/extractor/historicfilms.py | 45 + yt_dlp/extractor/hitrecord.py | 66 + yt_dlp/extractor/hketv.py | 187 + yt_dlp/extractor/hollywoodreporter.py | 72 + yt_dlp/extractor/holodex.py | 100 + yt_dlp/extractor/hotnewhiphop.py | 61 + yt_dlp/extractor/hotstar.py | 468 ++ yt_dlp/extractor/hrefli.py | 15 + yt_dlp/extractor/hrfensehen.py | 90 + yt_dlp/extractor/hrti.py | 200 + yt_dlp/extractor/hse.py | 93 + yt_dlp/extractor/huajiao.py | 53 + yt_dlp/extractor/huffpost.py | 90 + yt_dlp/extractor/hungama.py | 201 + yt_dlp/extractor/huya.py | 134 + yt_dlp/extractor/hypem.py | 47 + yt_dlp/extractor/hypergryph.py | 32 + yt_dlp/extractor/hytale.py | 58 + yt_dlp/extractor/icareus.py | 179 + yt_dlp/extractor/ichinanalive.py | 160 + yt_dlp/extractor/idolplus.py | 115 + yt_dlp/extractor/ign.py | 399 ++ yt_dlp/extractor/iheart.py | 94 + yt_dlp/extractor/ilpost.py | 69 + yt_dlp/extractor/iltalehti.py | 51 + yt_dlp/extractor/imdb.py | 144 + yt_dlp/extractor/imggaming.py | 126 + yt_dlp/extractor/imgur.py | 366 + yt_dlp/extractor/ina.py | 84 + yt_dlp/extractor/inc.py | 57 + yt_dlp/extractor/indavideo.py | 115 + yt_dlp/extractor/infoq.py | 136 + yt_dlp/extractor/instagram.py | 735 ++ yt_dlp/extractor/internazionale.py | 75 + yt_dlp/extractor/internetvideoarchive.py | 58 + yt_dlp/extractor/iprima.py | 280 + yt_dlp/extractor/iqiyi.py | 766 ++ yt_dlp/extractor/islamchannel.py | 81 + yt_dlp/extractor/israelnationalnews.py | 50 + yt_dlp/extractor/itprotv.py | 139 + yt_dlp/extractor/itv.py | 266 + yt_dlp/extractor/ivi.py | 253 + yt_dlp/extractor/ivideon.py | 77 + yt_dlp/extractor/iwara.py | 298 + yt_dlp/extractor/ixigua.py | 83 + yt_dlp/extractor/izlesene.py | 113 + yt_dlp/extractor/jable.py | 103 + yt_dlp/extractor/jamendo.py | 210 + yt_dlp/extractor/japandiet.py | 274 + yt_dlp/extractor/jeuxvideo.py | 52 + yt_dlp/extractor/jiosaavn.py | 105 + yt_dlp/extractor/jixie.py | 47 + yt_dlp/extractor/joj.py | 108 + yt_dlp/extractor/joqrag.py | 112 + yt_dlp/extractor/jove.py | 76 + yt_dlp/extractor/jstream.py | 73 + yt_dlp/extractor/jtbc.py | 156 + yt_dlp/extractor/jwplatform.py | 90 + yt_dlp/extractor/kakao.py | 152 + yt_dlp/extractor/kaltura.py | 545 ++ yt_dlp/extractor/kankanews.py | 49 + yt_dlp/extractor/karaoketv.py | 61 + yt_dlp/extractor/kelbyone.py | 81 + yt_dlp/extractor/khanacademy.py | 110 + yt_dlp/extractor/kick.py | 126 + yt_dlp/extractor/kicker.py | 55 + yt_dlp/extractor/kickstarter.py | 68 + yt_dlp/extractor/kinja.py | 199 + yt_dlp/extractor/kinopoisk.py | 63 + yt_dlp/extractor/kommunetv.py | 31 + yt_dlp/extractor/kompas.py | 26 + yt_dlp/extractor/koo.py | 114 + yt_dlp/extractor/krasview.py | 58 + yt_dlp/extractor/kth.py | 28 + yt_dlp/extractor/ku6.py | 30 + yt_dlp/extractor/kukululive.py | 140 + yt_dlp/extractor/kuwo.py | 352 + yt_dlp/extractor/la7.py | 234 + yt_dlp/extractor/lastfm.py | 129 + yt_dlp/extractor/laxarxames.py | 73 + yt_dlp/extractor/lbry.py | 429 ++ yt_dlp/extractor/lci.py | 28 + yt_dlp/extractor/lcp.py | 87 + yt_dlp/extractor/lecture2go.py | 67 + yt_dlp/extractor/lecturio.py | 235 + yt_dlp/extractor/leeco.py | 364 + yt_dlp/extractor/lefigaro.py | 136 + yt_dlp/extractor/lego.py | 141 + yt_dlp/extractor/lemonde.py | 56 + yt_dlp/extractor/lenta.py | 51 + yt_dlp/extractor/libraryofcongress.py | 148 + yt_dlp/extractor/libsyn.py | 89 + yt_dlp/extractor/lifenews.py | 234 + yt_dlp/extractor/likee.py | 182 + yt_dlp/extractor/limelight.py | 358 + yt_dlp/extractor/linkedin.py | 272 + yt_dlp/extractor/liputan6.py | 64 + yt_dlp/extractor/listennotes.py | 86 + yt_dlp/extractor/litv.py | 148 + yt_dlp/extractor/livejournal.py | 39 + yt_dlp/extractor/livestream.py | 388 + yt_dlp/extractor/livestreamfails.py | 37 + yt_dlp/extractor/lnkgo.py | 163 + yt_dlp/extractor/lovehomeporn.py | 33 + yt_dlp/extractor/lrt.py | 108 + yt_dlp/extractor/lsm.py | 282 + yt_dlp/extractor/lumni.py | 23 + yt_dlp/extractor/lynda.py | 330 + yt_dlp/extractor/maariv.py | 62 + yt_dlp/extractor/magellantv.py | 62 + yt_dlp/extractor/magentamusik.py | 62 + yt_dlp/extractor/mailru.py | 338 + yt_dlp/extractor/mainstreaming.py | 210 + yt_dlp/extractor/mangomolo.py | 73 + yt_dlp/extractor/manoto.py | 133 + yt_dlp/extractor/manyvids.py | 162 + yt_dlp/extractor/maoritv.py | 28 + yt_dlp/extractor/markiza.py | 124 + yt_dlp/extractor/massengeschmacktv.py | 72 + yt_dlp/extractor/masters.py | 38 + yt_dlp/extractor/matchtv.py | 51 + yt_dlp/extractor/mbn.py | 89 + yt_dlp/extractor/mdr.py | 184 + yt_dlp/extractor/medaltv.py | 162 + yt_dlp/extractor/mediaite.py | 104 + yt_dlp/extractor/mediaklikk.py | 156 + yt_dlp/extractor/medialaan.py | 111 + yt_dlp/extractor/mediaset.py | 320 + yt_dlp/extractor/mediasite.py | 411 ++ yt_dlp/extractor/mediastream.py | 226 + yt_dlp/extractor/mediaworksnz.py | 103 + yt_dlp/extractor/medici.py | 67 + yt_dlp/extractor/megaphone.py | 46 + yt_dlp/extractor/megatvcom.py | 164 + yt_dlp/extractor/meipai.py | 99 + yt_dlp/extractor/melonvod.py | 68 + yt_dlp/extractor/metacritic.py | 62 + yt_dlp/extractor/mgtv.py | 165 + yt_dlp/extractor/microsoftembed.py | 65 + yt_dlp/extractor/microsoftstream.py | 121 + yt_dlp/extractor/microsoftvirtualacademy.py | 189 + yt_dlp/extractor/mildom.py | 291 + yt_dlp/extractor/minds.py | 193 + yt_dlp/extractor/minoto.py | 45 + yt_dlp/extractor/mirrativ.py | 118 + yt_dlp/extractor/mirrorcouk.py | 98 + yt_dlp/extractor/mit.py | 130 + yt_dlp/extractor/mitele.py | 82 + yt_dlp/extractor/mixch.py | 81 + yt_dlp/extractor/mixcloud.py | 379 + yt_dlp/extractor/mlb.py | 379 + yt_dlp/extractor/mlssoccer.py | 114 + yt_dlp/extractor/mocha.py | 64 + yt_dlp/extractor/mojvideo.py | 52 + yt_dlp/extractor/monstercat.py | 77 + yt_dlp/extractor/motherless.py | 297 + yt_dlp/extractor/motorsport.py | 52 + yt_dlp/extractor/moviepilot.py | 97 + yt_dlp/extractor/moview.py | 43 + yt_dlp/extractor/moviezine.py | 38 + yt_dlp/extractor/movingimage.py | 50 + yt_dlp/extractor/msn.py | 168 + yt_dlp/extractor/mtv.py | 654 ++ yt_dlp/extractor/muenchentv.py | 72 + yt_dlp/extractor/murrtube.py | 164 + yt_dlp/extractor/museai.py | 112 + yt_dlp/extractor/musescore.py | 64 + yt_dlp/extractor/musicdex.py | 172 + yt_dlp/extractor/mx3.py | 171 + yt_dlp/extractor/mxplayer.py | 241 + yt_dlp/extractor/myspace.py | 195 + yt_dlp/extractor/myspass.py | 92 + yt_dlp/extractor/myvideoge.py | 81 + yt_dlp/extractor/myvidster.py | 27 + yt_dlp/extractor/mzaalo.py | 95 + yt_dlp/extractor/n1.py | 163 + yt_dlp/extractor/nate.py | 120 + yt_dlp/extractor/nationalgeographic.py | 83 + yt_dlp/extractor/naver.py | 404 ++ yt_dlp/extractor/nba.py | 419 ++ yt_dlp/extractor/nbc.py | 851 +++ yt_dlp/extractor/ndr.py | 471 ++ yt_dlp/extractor/ndtv.py | 107 + yt_dlp/extractor/nebula.py | 468 ++ yt_dlp/extractor/nekohacker.py | 213 + yt_dlp/extractor/nerdcubed.py | 38 + yt_dlp/extractor/neteasemusic.py | 615 ++ yt_dlp/extractor/netverse.py | 281 + yt_dlp/extractor/netzkino.py | 85 + yt_dlp/extractor/newgrounds.py | 311 + yt_dlp/extractor/newspicks.py | 53 + yt_dlp/extractor/newsy.py | 47 + yt_dlp/extractor/nextmedia.py | 237 + yt_dlp/extractor/nexx.py | 525 ++ yt_dlp/extractor/nfb.py | 300 + yt_dlp/extractor/nfhsnetwork.py | 141 + yt_dlp/extractor/nfl.py | 373 + yt_dlp/extractor/nhk.py | 708 ++ yt_dlp/extractor/nhl.py | 123 + yt_dlp/extractor/nick.py | 224 + yt_dlp/extractor/niconico.py | 1061 +++ yt_dlp/extractor/niconicochannelplus.py | 426 ++ yt_dlp/extractor/ninaprotocol.py | 225 + yt_dlp/extractor/ninecninemedia.py | 130 + yt_dlp/extractor/ninegag.py | 148 + yt_dlp/extractor/ninenews.py | 72 + yt_dlp/extractor/ninenow.py | 122 + yt_dlp/extractor/nintendo.py | 131 + yt_dlp/extractor/nitter.py | 360 + yt_dlp/extractor/nobelprize.py | 59 + yt_dlp/extractor/noice.py | 116 + yt_dlp/extractor/nonktube.py | 36 + yt_dlp/extractor/noodlemagazine.py | 80 + yt_dlp/extractor/noovo.py | 101 + yt_dlp/extractor/nosnl.py | 115 + yt_dlp/extractor/nova.py | 307 + yt_dlp/extractor/novaplay.py | 67 + yt_dlp/extractor/nowness.py | 142 + yt_dlp/extractor/noz.py | 83 + yt_dlp/extractor/npo.py | 612 ++ yt_dlp/extractor/npr.py | 132 + yt_dlp/extractor/nrk.py | 875 +++ yt_dlp/extractor/nrl.py | 27 + yt_dlp/extractor/ntvcojp.py | 55 + yt_dlp/extractor/ntvde.py | 83 + yt_dlp/extractor/ntvru.py | 142 + yt_dlp/extractor/nubilesporn.py | 99 + yt_dlp/extractor/nuevo.py | 36 + yt_dlp/extractor/nuum.py | 199 + yt_dlp/extractor/nuvid.py | 99 + yt_dlp/extractor/nytimes.py | 420 ++ yt_dlp/extractor/nzherald.py | 123 + yt_dlp/extractor/nzonscreen.py | 93 + yt_dlp/extractor/nzz.py | 40 + yt_dlp/extractor/odkmedia.py | 105 + yt_dlp/extractor/odnoklassniki.py | 464 ++ yt_dlp/extractor/oftv.py | 54 + yt_dlp/extractor/oktoberfesttv.py | 44 + yt_dlp/extractor/olympics.py | 65 + yt_dlp/extractor/on24.py | 87 + yt_dlp/extractor/once.py | 40 + yt_dlp/extractor/ondemandkorea.py | 169 + yt_dlp/extractor/onefootball.py | 51 + yt_dlp/extractor/onenewsnz.py | 111 + yt_dlp/extractor/oneplace.py | 43 + yt_dlp/extractor/onet.py | 259 + yt_dlp/extractor/onionstudios.py | 42 + yt_dlp/extractor/opencast.py | 183 + yt_dlp/extractor/openload.py | 243 + yt_dlp/extractor/openrec.py | 151 + yt_dlp/extractor/ora.py | 71 + yt_dlp/extractor/orf.py | 630 ++ yt_dlp/extractor/outsidetv.py | 25 + yt_dlp/extractor/owncloud.py | 80 + yt_dlp/extractor/packtpub.py | 155 + yt_dlp/extractor/palcomp3.py | 143 + yt_dlp/extractor/panopto.py | 600 ++ yt_dlp/extractor/paramountplus.py | 201 + yt_dlp/extractor/parler.py | 91 + yt_dlp/extractor/parlview.py | 64 + yt_dlp/extractor/patreon.py | 454 ++ yt_dlp/extractor/pbs.py | 757 ++ yt_dlp/extractor/pearvideo.py | 68 + yt_dlp/extractor/peekvids.py | 188 + yt_dlp/extractor/peertube.py | 1647 +++++ yt_dlp/extractor/peertv.py | 52 + yt_dlp/extractor/peloton.py | 215 + yt_dlp/extractor/performgroup.py | 77 + yt_dlp/extractor/periscope.py | 188 + yt_dlp/extractor/pgatour.py | 47 + yt_dlp/extractor/philharmoniedeparis.py | 97 + yt_dlp/extractor/phoenix.py | 130 + yt_dlp/extractor/photobucket.py | 43 + yt_dlp/extractor/piapro.py | 121 + yt_dlp/extractor/piaulizaportal.py | 70 + yt_dlp/extractor/picarto.py | 152 + yt_dlp/extractor/piksel.py | 174 + yt_dlp/extractor/pinkbike.py | 93 + yt_dlp/extractor/pinterest.py | 248 + yt_dlp/extractor/pixivsketch.py | 118 + yt_dlp/extractor/pladform.py | 135 + yt_dlp/extractor/planetmarathi.py | 71 + yt_dlp/extractor/platzi.py | 213 + yt_dlp/extractor/playplustv.py | 100 + yt_dlp/extractor/playsuisse.py | 234 + yt_dlp/extractor/playtvak.py | 185 + yt_dlp/extractor/playwire.py | 72 + yt_dlp/extractor/pluralsight.py | 491 ++ yt_dlp/extractor/plutotv.py | 195 + yt_dlp/extractor/podbayfm.py | 75 + yt_dlp/extractor/podchaser.py | 97 + yt_dlp/extractor/podomatic.py | 74 + yt_dlp/extractor/pokemon.py | 136 + yt_dlp/extractor/pokergo.py | 106 + yt_dlp/extractor/polsatgo.py | 86 + yt_dlp/extractor/polskieradio.py | 610 ++ yt_dlp/extractor/popcorntimes.py | 91 + yt_dlp/extractor/popcorntv.py | 72 + yt_dlp/extractor/porn91.py | 95 + yt_dlp/extractor/pornbox.py | 113 + yt_dlp/extractor/pornflip.py | 77 + yt_dlp/extractor/pornhub.py | 825 +++ yt_dlp/extractor/pornotube.py | 83 + yt_dlp/extractor/pornovoisines.py | 103 + yt_dlp/extractor/pornoxo.py | 55 + yt_dlp/extractor/pr0gramm.py | 201 + yt_dlp/extractor/prankcast.py | 137 + yt_dlp/extractor/premiershiprugby.py | 39 + yt_dlp/extractor/presstv.py | 69 + yt_dlp/extractor/projectveritas.py | 52 + yt_dlp/extractor/prosiebensat1.py | 496 ++ yt_dlp/extractor/prx.py | 428 ++ yt_dlp/extractor/puhutv.py | 233 + yt_dlp/extractor/puls4.py | 51 + yt_dlp/extractor/pyvideo.py | 70 + yt_dlp/extractor/qdance.py | 171 + yt_dlp/extractor/qingting.py | 47 + yt_dlp/extractor/qqmusic.py | 365 + yt_dlp/extractor/r7.py | 112 + yt_dlp/extractor/radiko.py | 261 + yt_dlp/extractor/radiocanada.py | 165 + yt_dlp/extractor/radiocomercial.py | 154 + yt_dlp/extractor/radiode.py | 50 + yt_dlp/extractor/radiofrance.py | 473 ++ yt_dlp/extractor/radiojavan.py | 81 + yt_dlp/extractor/radiokapital.py | 97 + yt_dlp/extractor/radiozet.py | 50 + yt_dlp/extractor/radlive.py | 180 + yt_dlp/extractor/rai.py | 816 +++ yt_dlp/extractor/raywenderlich.py | 177 + yt_dlp/extractor/rbgtum.py | 142 + yt_dlp/extractor/rcs.py | 372 + yt_dlp/extractor/rcti.py | 373 + yt_dlp/extractor/rds.py | 68 + yt_dlp/extractor/redbee.py | 380 + yt_dlp/extractor/redbulltv.py | 224 + yt_dlp/extractor/reddit.py | 353 + yt_dlp/extractor/redge.py | 135 + yt_dlp/extractor/redgifs.py | 260 + yt_dlp/extractor/redtube.py | 144 + yt_dlp/extractor/rentv.py | 104 + yt_dlp/extractor/restudy.py | 41 + yt_dlp/extractor/reuters.py | 66 + yt_dlp/extractor/reverbnation.py | 51 + yt_dlp/extractor/rheinmaintv.py | 94 + yt_dlp/extractor/ridehome.py | 96 + yt_dlp/extractor/rinsefm.py | 89 + yt_dlp/extractor/rmcdecouverte.py | 71 + yt_dlp/extractor/rockstargames.py | 65 + yt_dlp/extractor/rokfin.py | 455 ++ yt_dlp/extractor/roosterteeth.py | 352 + yt_dlp/extractor/rottentomatoes.py | 80 + yt_dlp/extractor/rozhlas.py | 363 + yt_dlp/extractor/rte.py | 162 + yt_dlp/extractor/rtl2.py | 95 + yt_dlp/extractor/rtlnl.py | 294 + yt_dlp/extractor/rtnews.py | 196 + yt_dlp/extractor/rtp.py | 97 + yt_dlp/extractor/rtrfm.py | 65 + yt_dlp/extractor/rts.py | 232 + yt_dlp/extractor/rtvcplay.py | 285 + yt_dlp/extractor/rtve.py | 344 + yt_dlp/extractor/rtvs.py | 85 + yt_dlp/extractor/rtvslo.py | 166 + yt_dlp/extractor/rudovideo.py | 135 + yt_dlp/extractor/rule34video.py | 123 + yt_dlp/extractor/rumble.py | 390 ++ yt_dlp/extractor/rutube.py | 365 + yt_dlp/extractor/rutv.py | 203 + yt_dlp/extractor/ruutu.py | 262 + yt_dlp/extractor/ruv.py | 186 + yt_dlp/extractor/s4c.py | 103 + yt_dlp/extractor/safari.py | 259 + yt_dlp/extractor/saitosan.py | 75 + yt_dlp/extractor/samplefocus.py | 97 + yt_dlp/extractor/sapo.py | 114 + yt_dlp/extractor/sbs.py | 156 + yt_dlp/extractor/sbscokr.py | 200 + yt_dlp/extractor/screen9.py | 62 + yt_dlp/extractor/screencast.py | 117 + yt_dlp/extractor/screencastify.py | 70 + yt_dlp/extractor/screencastomatic.py | 72 + yt_dlp/extractor/scrippsnetworks.py | 155 + yt_dlp/extractor/scrolller.py | 102 + yt_dlp/extractor/scte.py | 137 + yt_dlp/extractor/sejmpl.py | 218 + yt_dlp/extractor/senalcolombia.py | 32 + yt_dlp/extractor/senategov.py | 200 + yt_dlp/extractor/sendtonews.py | 105 + yt_dlp/extractor/servus.py | 135 + yt_dlp/extractor/sevenplus.py | 132 + yt_dlp/extractor/sexu.py | 61 + yt_dlp/extractor/seznamzpravy.py | 157 + yt_dlp/extractor/shahid.py | 217 + yt_dlp/extractor/sharevideos.py | 6 + yt_dlp/extractor/shemaroome.py | 102 + yt_dlp/extractor/showroomlive.py | 80 + yt_dlp/extractor/sibnet.py | 17 + yt_dlp/extractor/simplecast.py | 151 + yt_dlp/extractor/sina.py | 109 + yt_dlp/extractor/sixplay.py | 122 + yt_dlp/extractor/skeb.py | 140 + yt_dlp/extractor/sky.py | 135 + yt_dlp/extractor/skyit.py | 227 + yt_dlp/extractor/skylinewebcams.py | 40 + yt_dlp/extractor/skynewsarabia.py | 116 + yt_dlp/extractor/skynewsau.py | 43 + yt_dlp/extractor/slideshare.py | 53 + yt_dlp/extractor/slideslive.py | 554 ++ yt_dlp/extractor/slutload.py | 63 + yt_dlp/extractor/smotrim.py | 65 + yt_dlp/extractor/snotr.py | 68 + yt_dlp/extractor/sohu.py | 293 + yt_dlp/extractor/sonyliv.py | 220 + yt_dlp/extractor/soundcloud.py | 948 +++ yt_dlp/extractor/soundgasm.py | 74 + yt_dlp/extractor/southpark.py | 188 + yt_dlp/extractor/sovietscloset.py | 207 + yt_dlp/extractor/spankbang.py | 195 + yt_dlp/extractor/spiegel.py | 51 + yt_dlp/extractor/spike.py | 46 + yt_dlp/extractor/sport5.py | 86 + yt_dlp/extractor/sportbox.py | 88 + yt_dlp/extractor/sportdeutschland.py | 142 + yt_dlp/extractor/spotify.py | 167 + yt_dlp/extractor/spreaker.py | 173 + yt_dlp/extractor/springboardplatform.py | 113 + yt_dlp/extractor/sprout.py | 61 + yt_dlp/extractor/srgssr.py | 247 + yt_dlp/extractor/srmediathek.py | 57 + yt_dlp/extractor/stacommu.py | 231 + yt_dlp/extractor/stageplus.py | 515 ++ yt_dlp/extractor/stanfordoc.py | 89 + yt_dlp/extractor/startrek.py | 76 + yt_dlp/extractor/startv.py | 100 + yt_dlp/extractor/steam.py | 170 + yt_dlp/extractor/stitcher.py | 142 + yt_dlp/extractor/storyfire.py | 133 + yt_dlp/extractor/streamable.py | 103 + yt_dlp/extractor/streamcz.py | 122 + yt_dlp/extractor/streetvoice.py | 97 + yt_dlp/extractor/stretchinternet.py | 35 + yt_dlp/extractor/stripchat.py | 66 + yt_dlp/extractor/stv.py | 89 + yt_dlp/extractor/substack.py | 108 + yt_dlp/extractor/sunporno.py | 75 + yt_dlp/extractor/sverigesradio.py | 149 + yt_dlp/extractor/svt.py | 489 ++ yt_dlp/extractor/swearnet.py | 79 + yt_dlp/extractor/syfy.py | 58 + yt_dlp/extractor/syvdk.py | 33 + yt_dlp/extractor/sztvhu.py | 38 + yt_dlp/extractor/tagesschau.py | 164 + yt_dlp/extractor/tass.py | 59 + yt_dlp/extractor/tbs.py | 89 + yt_dlp/extractor/tbsjp.py | 152 + yt_dlp/extractor/teachable.py | 296 + yt_dlp/extractor/teachertube.py | 126 + yt_dlp/extractor/teachingchannel.py | 32 + yt_dlp/extractor/teamcoco.py | 280 + yt_dlp/extractor/teamtreehouse.py | 134 + yt_dlp/extractor/ted.py | 236 + yt_dlp/extractor/tele13.py | 84 + yt_dlp/extractor/tele5.py | 89 + yt_dlp/extractor/telebruxelles.py | 72 + yt_dlp/extractor/telecaribe.py | 91 + yt_dlp/extractor/telecinco.py | 146 + yt_dlp/extractor/telegraaf.py | 86 + yt_dlp/extractor/telegram.py | 136 + yt_dlp/extractor/telemb.py | 75 + yt_dlp/extractor/telemundo.py | 50 + yt_dlp/extractor/telequebec.py | 237 + yt_dlp/extractor/teletask.py | 52 + yt_dlp/extractor/telewebion.py | 133 + yt_dlp/extractor/tempo.py | 114 + yt_dlp/extractor/tencent.py | 490 ++ yt_dlp/extractor/tennistv.py | 155 + yt_dlp/extractor/tenplay.py | 170 + yt_dlp/extractor/testurl.py | 50 + yt_dlp/extractor/tf1.py | 101 + yt_dlp/extractor/tfo.py | 48 + yt_dlp/extractor/theguardian.py | 135 + yt_dlp/extractor/theholetv.py | 35 + yt_dlp/extractor/theintercept.py | 46 + yt_dlp/extractor/theplatform.py | 429 ++ yt_dlp/extractor/thestar.py | 33 + yt_dlp/extractor/thesun.py | 43 + yt_dlp/extractor/theweatherchannel.py | 99 + yt_dlp/extractor/thisamericanlife.py | 38 + yt_dlp/extractor/thisoldhouse.py | 104 + yt_dlp/extractor/thisvid.py | 226 + yt_dlp/extractor/threeqsdn.py | 156 + yt_dlp/extractor/threespeak.py | 93 + yt_dlp/extractor/tiktok.py | 1317 ++++ yt_dlp/extractor/tmz.py | 193 + yt_dlp/extractor/tnaflix.py | 336 + yt_dlp/extractor/toggle.py | 228 + yt_dlp/extractor/toggo.py | 82 + yt_dlp/extractor/tonline.py | 53 + yt_dlp/extractor/toongoggles.py | 76 + yt_dlp/extractor/toutv.py | 87 + yt_dlp/extractor/toypics.py | 89 + yt_dlp/extractor/traileraddict.py | 61 + yt_dlp/extractor/triller.py | 329 + yt_dlp/extractor/trovo.py | 342 + yt_dlp/extractor/trtcocuk.py | 48 + yt_dlp/extractor/trtworld.py | 101 + yt_dlp/extractor/trueid.py | 136 + yt_dlp/extractor/trunews.py | 32 + yt_dlp/extractor/truth.py | 68 + yt_dlp/extractor/trutv.py | 70 + yt_dlp/extractor/tube8.py | 170 + yt_dlp/extractor/tubetugraz.py | 252 + yt_dlp/extractor/tubitv.py | 168 + yt_dlp/extractor/tumblr.py | 387 + yt_dlp/extractor/tunein.py | 234 + yt_dlp/extractor/turner.py | 256 + yt_dlp/extractor/tv2.py | 324 + yt_dlp/extractor/tv24ua.py | 78 + yt_dlp/extractor/tv2dk.py | 172 + yt_dlp/extractor/tv2hu.py | 104 + yt_dlp/extractor/tv4.py | 149 + yt_dlp/extractor/tv5mondeplus.py | 190 + yt_dlp/extractor/tv5unis.py | 116 + yt_dlp/extractor/tva.py | 85 + yt_dlp/extractor/tvanouvelles.py | 62 + yt_dlp/extractor/tvc.py | 97 + yt_dlp/extractor/tver.py | 103 + yt_dlp/extractor/tvigle.py | 133 + yt_dlp/extractor/tviplayer.py | 78 + yt_dlp/extractor/tvland.py | 37 + yt_dlp/extractor/tvn24.py | 100 + yt_dlp/extractor/tvnoe.py | 46 + yt_dlp/extractor/tvopengr.py | 116 + yt_dlp/extractor/tvp.py | 642 ++ yt_dlp/extractor/tvplay.py | 306 + yt_dlp/extractor/tvplayer.py | 80 + yt_dlp/extractor/tweakers.py | 59 + yt_dlp/extractor/twentymin.py | 80 + yt_dlp/extractor/twentythreevideo.py | 76 + yt_dlp/extractor/twitcasting.py | 306 + yt_dlp/extractor/twitch.py | 1211 ++++ yt_dlp/extractor/twitter.py | 1875 +++++ yt_dlp/extractor/txxx.py | 438 ++ yt_dlp/extractor/udemy.py | 474 ++ yt_dlp/extractor/udn.py | 98 + yt_dlp/extractor/ufctv.py | 13 + yt_dlp/extractor/ukcolumn.py | 71 + yt_dlp/extractor/uktvplay.py | 36 + yt_dlp/extractor/umg.py | 98 + yt_dlp/extractor/unistra.py | 64 + yt_dlp/extractor/unity.py | 31 + yt_dlp/extractor/unsupported.py | 189 + yt_dlp/extractor/uol.py | 138 + yt_dlp/extractor/uplynk.py | 88 + yt_dlp/extractor/urort.py | 60 + yt_dlp/extractor/urplay.py | 164 + yt_dlp/extractor/usanetwork.py | 21 + yt_dlp/extractor/usatoday.py | 60 + yt_dlp/extractor/ustream.py | 275 + yt_dlp/extractor/ustudio.py | 119 + yt_dlp/extractor/utreon.py | 98 + yt_dlp/extractor/varzesh3.py | 73 + yt_dlp/extractor/vbox7.py | 97 + yt_dlp/extractor/veo.py | 76 + yt_dlp/extractor/veoh.py | 188 + yt_dlp/extractor/vesti.py | 119 + yt_dlp/extractor/vevo.py | 353 + yt_dlp/extractor/vgtv.py | 311 + yt_dlp/extractor/vh1.py | 33 + yt_dlp/extractor/vice.py | 313 + yt_dlp/extractor/viddler.py | 135 + yt_dlp/extractor/videa.py | 188 + yt_dlp/extractor/videocampus_sachsen.py | 253 + yt_dlp/extractor/videodetective.py | 27 + yt_dlp/extractor/videofyme.py | 51 + yt_dlp/extractor/videoken.py | 337 + yt_dlp/extractor/videomore.py | 307 + yt_dlp/extractor/videopress.py | 89 + yt_dlp/extractor/vidio.py | 309 + yt_dlp/extractor/vidlii.py | 154 + yt_dlp/extractor/vidly.py | 83 + yt_dlp/extractor/viewlift.py | 362 + yt_dlp/extractor/viidea.py | 199 + yt_dlp/extractor/viki.py | 346 + yt_dlp/extractor/vimeo.py | 1455 ++++ yt_dlp/extractor/vimm.py | 66 + yt_dlp/extractor/vine.py | 151 + yt_dlp/extractor/viously.py | 60 + yt_dlp/extractor/viqeo.py | 87 + yt_dlp/extractor/viu.py | 542 ++ yt_dlp/extractor/vk.py | 842 +++ yt_dlp/extractor/vocaroo.py | 63 + yt_dlp/extractor/vodpl.py | 29 + yt_dlp/extractor/vodplatform.py | 37 + yt_dlp/extractor/voicy.py | 146 + yt_dlp/extractor/volejtv.py | 40 + yt_dlp/extractor/voot.py | 212 + yt_dlp/extractor/voxmedia.py | 215 + yt_dlp/extractor/vrt.py | 427 ++ yt_dlp/extractor/vtm.py | 60 + yt_dlp/extractor/vuclip.py | 68 + yt_dlp/extractor/vvvvid.py | 336 + yt_dlp/extractor/walla.py | 82 + yt_dlp/extractor/washingtonpost.py | 123 + yt_dlp/extractor/wat.py | 119 + yt_dlp/extractor/wdr.py | 384 + yt_dlp/extractor/webcamerapl.py | 44 + yt_dlp/extractor/webcaster.py | 92 + yt_dlp/extractor/webofstories.py | 155 + yt_dlp/extractor/weibo.py | 251 + yt_dlp/extractor/weiqitv.py | 50 + yt_dlp/extractor/weverse.py | 608 ++ yt_dlp/extractor/wevidi.py | 108 + yt_dlp/extractor/weyyak.py | 86 + yt_dlp/extractor/whowatch.py | 96 + yt_dlp/extractor/whyp.py | 50 + yt_dlp/extractor/wikimedia.py | 55 + yt_dlp/extractor/wimbledon.py | 61 + yt_dlp/extractor/wimtv.py | 150 + yt_dlp/extractor/wistia.py | 394 ++ yt_dlp/extractor/wordpress.py | 154 + yt_dlp/extractor/worldstarhiphop.py | 38 + yt_dlp/extractor/wppilot.py | 173 + yt_dlp/extractor/wrestleuniverse.py | 304 + yt_dlp/extractor/wsj.py | 120 + yt_dlp/extractor/wwe.py | 138 + yt_dlp/extractor/wykop.py | 268 + yt_dlp/extractor/xanimu.py | 51 + yt_dlp/extractor/xboxclips.py | 62 + yt_dlp/extractor/xfileshare.py | 198 + yt_dlp/extractor/xhamster.py | 465 ++ yt_dlp/extractor/ximalaya.py | 167 + yt_dlp/extractor/xinpianchang.py | 92 + yt_dlp/extractor/xminus.py | 77 + yt_dlp/extractor/xnxx.py | 83 + yt_dlp/extractor/xstream.py | 115 + yt_dlp/extractor/xvideos.py | 180 + yt_dlp/extractor/xxxymovies.py | 77 + yt_dlp/extractor/yahoo.py | 430 ++ yt_dlp/extractor/yandexdisk.py | 142 + yt_dlp/extractor/yandexmusic.py | 454 ++ yt_dlp/extractor/yandexvideo.py | 390 ++ yt_dlp/extractor/yapfiles.py | 90 + yt_dlp/extractor/yappy.py | 128 + yt_dlp/extractor/yle_areena.py | 134 + yt_dlp/extractor/youjizz.py | 90 + yt_dlp/extractor/youku.py | 290 + yt_dlp/extractor/younow.py | 201 + yt_dlp/extractor/youporn.py | 198 + yt_dlp/extractor/yourporn.py | 65 + yt_dlp/extractor/yourupload.py | 43 + yt_dlp/extractor/youtube.py | 7387 ++++++++++++++++++++ yt_dlp/extractor/zaiko.py | 139 + yt_dlp/extractor/zapiks.py | 106 + yt_dlp/extractor/zattoo.py | 865 +++ yt_dlp/extractor/zdf.py | 442 ++ yt_dlp/extractor/zee5.py | 270 + yt_dlp/extractor/zeenews.py | 59 + yt_dlp/extractor/zenporn.py | 118 + yt_dlp/extractor/zetland.py | 71 + yt_dlp/extractor/zhihu.py | 65 + yt_dlp/extractor/zingmp3.py | 628 ++ yt_dlp/extractor/zoom.py | 164 + yt_dlp/extractor/zype.py | 135 + yt_dlp/jsinterp.py | 853 +++ yt_dlp/minicurses.py | 182 + yt_dlp/networking/__init__.py | 30 + yt_dlp/networking/_helper.py | 283 + yt_dlp/networking/_requests.py | 408 ++ yt_dlp/networking/_urllib.py | 422 ++ yt_dlp/networking/_websockets.py | 173 + yt_dlp/networking/common.py | 565 ++ yt_dlp/networking/exceptions.py | 103 + yt_dlp/networking/websocket.py | 23 + yt_dlp/options.py | 1920 +++++ yt_dlp/plugins.py | 176 + yt_dlp/postprocessor/__init__.py | 47 + yt_dlp/postprocessor/common.py | 215 + yt_dlp/postprocessor/embedthumbnail.py | 227 + yt_dlp/postprocessor/exec.py | 41 + yt_dlp/postprocessor/ffmpeg.py | 1192 ++++ yt_dlp/postprocessor/metadataparser.py | 125 + yt_dlp/postprocessor/modify_chapters.py | 336 + yt_dlp/postprocessor/movefilesafterdownload.py | 53 + yt_dlp/postprocessor/sponskrub.py | 98 + yt_dlp/postprocessor/sponsorblock.py | 104 + yt_dlp/postprocessor/xattrpp.py | 63 + yt_dlp/socks.py | 274 + yt_dlp/update.py | 619 ++ yt_dlp/utils/__init__.py | 10 + yt_dlp/utils/_deprecated.py | 39 + yt_dlp/utils/_legacy.py | 315 + yt_dlp/utils/_utils.py | 5445 +++++++++++++++ yt_dlp/utils/networking.py | 164 + yt_dlp/utils/progress.py | 109 + yt_dlp/utils/traversal.py | 276 + yt_dlp/version.py | 15 + yt_dlp/webvtt.py | 399 ++ 1205 files changed, 244386 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitattributes create mode 100644 .github/FUNDING.yml create mode 100644 .github/ISSUE_TEMPLATE/1_broken_site.yml create mode 100644 .github/ISSUE_TEMPLATE/2_site_support_request.yml create mode 100644 .github/ISSUE_TEMPLATE/3_site_feature_request.yml create mode 100644 .github/ISSUE_TEMPLATE/4_bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/5_feature_request.yml create mode 100644 .github/ISSUE_TEMPLATE/6_question.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml create mode 100644 .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml create mode 100644 .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml create mode 100644 .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml create mode 100644 .github/ISSUE_TEMPLATE_tmpl/6_question.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/banner.svg create mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/codeql.yml create mode 100644 .github/workflows/core.yml create mode 100644 .github/workflows/download.yml create mode 100644 .github/workflows/quick-test.yml create mode 100644 .github/workflows/release-master.yml create mode 100644 .github/workflows/release-nightly.yml create mode 100644 .github/workflows/release.yml create mode 100644 .gitignore create mode 100644 CONTRIBUTING.md create mode 100644 CONTRIBUTORS create mode 100644 Changelog.md create mode 100644 Collaborators.md create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100644 bundle/__init__.py create mode 100755 bundle/py2exe.py create mode 100755 bundle/pyinstaller.py create mode 100644 devscripts/__init__.py create mode 100644 devscripts/bash-completion.in create mode 100755 devscripts/bash-completion.py create mode 100644 devscripts/changelog_override.json create mode 100644 devscripts/changelog_override.schema.json create mode 100644 devscripts/check-porn.py create mode 100644 devscripts/cli_to_api.py create mode 100644 devscripts/fish-completion.in create mode 100755 devscripts/fish-completion.py create mode 100644 devscripts/generate_aes_testdata.py create mode 100755 devscripts/install_deps.py create mode 100644 devscripts/lazy_load_template.py create mode 100644 devscripts/logo.ico create mode 100644 devscripts/make_changelog.py create mode 100755 devscripts/make_contributing.py create mode 100644 devscripts/make_issue_template.py create mode 100644 devscripts/make_lazy_extractors.py create mode 100755 devscripts/make_readme.py create mode 100644 devscripts/make_supportedsites.py create mode 100644 devscripts/prepare_manpage.py create mode 100644 devscripts/run_tests.bat create mode 100755 devscripts/run_tests.py create mode 100755 devscripts/run_tests.sh create mode 100644 devscripts/set-variant.py create mode 100755 devscripts/tomlparse.py create mode 100644 devscripts/update-version.py create mode 100644 devscripts/utils.py create mode 100644 devscripts/zsh-completion.in create mode 100755 devscripts/zsh-completion.py create mode 100644 public.key create mode 100755 pyinst.py create mode 100644 pyproject.toml create mode 100644 setup.cfg create mode 100755 setup.py create mode 100644 supportedsites.md create mode 100644 test/__init__.py create mode 100644 test/conftest.py create mode 100644 test/helper.py create mode 100644 test/parameters.json create mode 100644 test/test_InfoExtractor.py create mode 100644 test/test_YoutubeDL.py create mode 100644 test/test_YoutubeDLCookieJar.py create mode 100644 test/test_aes.py create mode 100644 test/test_age_restriction.py create mode 100644 test/test_all_urls.py create mode 100644 test/test_cache.py create mode 100644 test/test_compat.py create mode 100644 test/test_config.py create mode 100644 test/test_cookies.py create mode 100755 test/test_download.py create mode 100644 test/test_downloader_external.py create mode 100644 test/test_downloader_http.py create mode 100644 test/test_execution.py create mode 100644 test/test_iqiyi_sdk_interpreter.py create mode 100644 test/test_jsinterp.py create mode 100644 test/test_netrc.py create mode 100644 test/test_networking.py create mode 100644 test/test_networking_utils.py create mode 100644 test/test_overwrites.py create mode 100644 test/test_plugins.py create mode 100644 test/test_post_hooks.py create mode 100644 test/test_postprocessors.py create mode 100644 test/test_socks.py create mode 100644 test/test_subtitles.py create mode 100644 test/test_update.py create mode 100644 test/test_utils.py create mode 100644 test/test_verbose_output.py create mode 100644 test/test_websockets.py create mode 100644 test/test_write_annotations.py.disabled create mode 100644 test/test_youtube_lists.py create mode 100644 test/test_youtube_misc.py create mode 100644 test/test_youtube_signature.py create mode 100644 test/testcert.pem create mode 100644 test/testdata/certificate/ca.crt create mode 100644 test/testdata/certificate/ca.key create mode 100644 test/testdata/certificate/ca.srl create mode 100644 test/testdata/certificate/client.crt create mode 100644 test/testdata/certificate/client.csr create mode 100644 test/testdata/certificate/client.key create mode 100644 test/testdata/certificate/clientencrypted.key create mode 100644 test/testdata/certificate/clientwithencryptedkey.crt create mode 100644 test/testdata/certificate/clientwithkey.crt create mode 100644 test/testdata/certificate/instructions.md create mode 100644 test/testdata/cookies/httponly_cookies.txt create mode 100644 test/testdata/cookies/malformed_cookies.txt create mode 100644 test/testdata/cookies/session_cookies.txt create mode 100644 test/testdata/f4m/custom_base_url.f4m create mode 100644 test/testdata/ism/ec-3_test.Manifest create mode 100644 test/testdata/ism/sintel.Manifest create mode 100644 test/testdata/m3u8/bipbop_16x9.m3u8 create mode 100644 test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8 create mode 100644 test/testdata/mpd/float_duration.mpd create mode 100644 test/testdata/mpd/subtitles.mpd create mode 100644 test/testdata/mpd/unfragmented.mpd create mode 100644 test/testdata/mpd/urls_only.mpd create mode 100644 test/testdata/thumbnails/foo %d bar/foo_%d.webp create mode 100644 test/testdata/xspf/foo_xspf.xspf create mode 100644 test/testdata/yt_dlp_plugins/extractor/_ignore.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/ignore.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/normal.py create mode 100644 test/testdata/yt_dlp_plugins/postprocessor/normal.py create mode 100644 test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py create mode 100644 test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py create mode 100644 yt-dlp.cmd create mode 100755 yt-dlp.sh create mode 100644 yt_dlp/YoutubeDL.py create mode 100644 yt_dlp/__init__.py create mode 100644 yt_dlp/__main__.py create mode 100644 yt_dlp/__pyinstaller/__init__.py create mode 100644 yt_dlp/__pyinstaller/hook-yt_dlp.py create mode 100644 yt_dlp/aes.py create mode 100644 yt_dlp/cache.py create mode 100644 yt_dlp/compat/__init__.py create mode 100644 yt_dlp/compat/_deprecated.py create mode 100644 yt_dlp/compat/_legacy.py create mode 100644 yt_dlp/compat/compat_utils.py create mode 100644 yt_dlp/compat/functools.py create mode 100644 yt_dlp/compat/imghdr.py create mode 100644 yt_dlp/compat/shutil.py create mode 100644 yt_dlp/compat/types.py create mode 100644 yt_dlp/compat/urllib/__init__.py create mode 100644 yt_dlp/compat/urllib/request.py create mode 100644 yt_dlp/cookies.py create mode 100644 yt_dlp/dependencies/Cryptodome.py create mode 100644 yt_dlp/dependencies/__init__.py create mode 100644 yt_dlp/downloader/__init__.py create mode 100644 yt_dlp/downloader/common.py create mode 100644 yt_dlp/downloader/dash.py create mode 100644 yt_dlp/downloader/external.py create mode 100644 yt_dlp/downloader/f4m.py create mode 100644 yt_dlp/downloader/fc2.py create mode 100644 yt_dlp/downloader/fragment.py create mode 100644 yt_dlp/downloader/hls.py create mode 100644 yt_dlp/downloader/http.py create mode 100644 yt_dlp/downloader/ism.py create mode 100644 yt_dlp/downloader/mhtml.py create mode 100644 yt_dlp/downloader/niconico.py create mode 100644 yt_dlp/downloader/rtmp.py create mode 100644 yt_dlp/downloader/rtsp.py create mode 100644 yt_dlp/downloader/websocket.py create mode 100644 yt_dlp/downloader/youtube_live_chat.py create mode 100644 yt_dlp/extractor/__init__.py create mode 100644 yt_dlp/extractor/_extractors.py create mode 100644 yt_dlp/extractor/abc.py create mode 100644 yt_dlp/extractor/abcnews.py create mode 100644 yt_dlp/extractor/abcotvs.py create mode 100644 yt_dlp/extractor/abematv.py create mode 100644 yt_dlp/extractor/academicearth.py create mode 100644 yt_dlp/extractor/acast.py create mode 100644 yt_dlp/extractor/acfun.py create mode 100644 yt_dlp/extractor/adn.py create mode 100644 yt_dlp/extractor/adobeconnect.py create mode 100644 yt_dlp/extractor/adobepass.py create mode 100644 yt_dlp/extractor/adobetv.py create mode 100644 yt_dlp/extractor/adultswim.py create mode 100644 yt_dlp/extractor/aenetworks.py create mode 100644 yt_dlp/extractor/aeonco.py create mode 100644 yt_dlp/extractor/afreecatv.py create mode 100644 yt_dlp/extractor/agora.py create mode 100644 yt_dlp/extractor/airtv.py create mode 100644 yt_dlp/extractor/aitube.py create mode 100644 yt_dlp/extractor/aliexpress.py create mode 100644 yt_dlp/extractor/aljazeera.py create mode 100644 yt_dlp/extractor/allocine.py create mode 100644 yt_dlp/extractor/allstar.py create mode 100644 yt_dlp/extractor/alphaporno.py create mode 100644 yt_dlp/extractor/alsace20tv.py create mode 100644 yt_dlp/extractor/altcensored.py create mode 100644 yt_dlp/extractor/alura.py create mode 100644 yt_dlp/extractor/amadeustv.py create mode 100644 yt_dlp/extractor/amara.py create mode 100644 yt_dlp/extractor/amazon.py create mode 100644 yt_dlp/extractor/amazonminitv.py create mode 100644 yt_dlp/extractor/amcnetworks.py create mode 100644 yt_dlp/extractor/americastestkitchen.py create mode 100644 yt_dlp/extractor/amp.py create mode 100644 yt_dlp/extractor/anchorfm.py create mode 100644 yt_dlp/extractor/angel.py create mode 100644 yt_dlp/extractor/antenna.py create mode 100644 yt_dlp/extractor/anvato.py create mode 100644 yt_dlp/extractor/aol.py create mode 100644 yt_dlp/extractor/apa.py create mode 100644 yt_dlp/extractor/aparat.py create mode 100644 yt_dlp/extractor/appleconnect.py create mode 100644 yt_dlp/extractor/applepodcasts.py create mode 100644 yt_dlp/extractor/appletrailers.py create mode 100644 yt_dlp/extractor/archiveorg.py create mode 100644 yt_dlp/extractor/arcpublishing.py create mode 100644 yt_dlp/extractor/ard.py create mode 100644 yt_dlp/extractor/arkena.py create mode 100644 yt_dlp/extractor/arnes.py create mode 100644 yt_dlp/extractor/art19.py create mode 100644 yt_dlp/extractor/arte.py create mode 100644 yt_dlp/extractor/asobichannel.py create mode 100644 yt_dlp/extractor/atresplayer.py create mode 100644 yt_dlp/extractor/atscaleconf.py create mode 100644 yt_dlp/extractor/atvat.py create mode 100644 yt_dlp/extractor/audimedia.py create mode 100644 yt_dlp/extractor/audioboom.py create mode 100644 yt_dlp/extractor/audiodraft.py create mode 100644 yt_dlp/extractor/audiomack.py create mode 100644 yt_dlp/extractor/audius.py create mode 100644 yt_dlp/extractor/awaan.py create mode 100644 yt_dlp/extractor/aws.py create mode 100644 yt_dlp/extractor/axs.py create mode 100644 yt_dlp/extractor/azmedien.py create mode 100644 yt_dlp/extractor/baidu.py create mode 100644 yt_dlp/extractor/banbye.py create mode 100644 yt_dlp/extractor/bandaichannel.py create mode 100644 yt_dlp/extractor/bandcamp.py create mode 100644 yt_dlp/extractor/bannedvideo.py create mode 100644 yt_dlp/extractor/bbc.py create mode 100644 yt_dlp/extractor/beatbump.py create mode 100644 yt_dlp/extractor/beatport.py create mode 100644 yt_dlp/extractor/beeg.py create mode 100644 yt_dlp/extractor/behindkink.py create mode 100644 yt_dlp/extractor/bellmedia.py create mode 100644 yt_dlp/extractor/berufetv.py create mode 100644 yt_dlp/extractor/bet.py create mode 100644 yt_dlp/extractor/bfi.py create mode 100644 yt_dlp/extractor/bfmtv.py create mode 100644 yt_dlp/extractor/bibeltv.py create mode 100644 yt_dlp/extractor/bigflix.py create mode 100644 yt_dlp/extractor/bigo.py create mode 100644 yt_dlp/extractor/bild.py create mode 100644 yt_dlp/extractor/bilibili.py create mode 100644 yt_dlp/extractor/biobiochiletv.py create mode 100644 yt_dlp/extractor/bitchute.py create mode 100644 yt_dlp/extractor/blackboardcollaborate.py create mode 100644 yt_dlp/extractor/bleacherreport.py create mode 100644 yt_dlp/extractor/blerp.py create mode 100644 yt_dlp/extractor/blogger.py create mode 100644 yt_dlp/extractor/bloomberg.py create mode 100644 yt_dlp/extractor/bokecc.py create mode 100644 yt_dlp/extractor/bongacams.py create mode 100644 yt_dlp/extractor/boosty.py create mode 100644 yt_dlp/extractor/bostonglobe.py create mode 100644 yt_dlp/extractor/box.py create mode 100644 yt_dlp/extractor/boxcast.py create mode 100644 yt_dlp/extractor/bpb.py create mode 100644 yt_dlp/extractor/br.py create mode 100644 yt_dlp/extractor/brainpop.py create mode 100644 yt_dlp/extractor/bravotv.py create mode 100644 yt_dlp/extractor/breitbart.py create mode 100644 yt_dlp/extractor/brightcove.py create mode 100644 yt_dlp/extractor/brilliantpala.py create mode 100644 yt_dlp/extractor/bundesliga.py create mode 100644 yt_dlp/extractor/bundestag.py create mode 100644 yt_dlp/extractor/businessinsider.py create mode 100644 yt_dlp/extractor/buzzfeed.py create mode 100644 yt_dlp/extractor/byutv.py create mode 100644 yt_dlp/extractor/c56.py create mode 100644 yt_dlp/extractor/cableav.py create mode 100644 yt_dlp/extractor/callin.py create mode 100644 yt_dlp/extractor/caltrans.py create mode 100644 yt_dlp/extractor/cam4.py create mode 100644 yt_dlp/extractor/camdemy.py create mode 100644 yt_dlp/extractor/camfm.py create mode 100644 yt_dlp/extractor/cammodels.py create mode 100644 yt_dlp/extractor/camsoda.py create mode 100644 yt_dlp/extractor/camtasia.py create mode 100644 yt_dlp/extractor/canal1.py create mode 100644 yt_dlp/extractor/canalalpha.py create mode 100644 yt_dlp/extractor/canalc2.py create mode 100644 yt_dlp/extractor/canalplus.py create mode 100644 yt_dlp/extractor/caracoltv.py create mode 100644 yt_dlp/extractor/cartoonnetwork.py create mode 100644 yt_dlp/extractor/cbc.py create mode 100644 yt_dlp/extractor/cbs.py create mode 100644 yt_dlp/extractor/cbsnews.py create mode 100644 yt_dlp/extractor/cbssports.py create mode 100644 yt_dlp/extractor/ccc.py create mode 100644 yt_dlp/extractor/ccma.py create mode 100644 yt_dlp/extractor/cctv.py create mode 100644 yt_dlp/extractor/cda.py create mode 100644 yt_dlp/extractor/cellebrite.py create mode 100644 yt_dlp/extractor/ceskatelevize.py create mode 100644 yt_dlp/extractor/cgtn.py create mode 100644 yt_dlp/extractor/charlierose.py create mode 100644 yt_dlp/extractor/chaturbate.py create mode 100644 yt_dlp/extractor/chilloutzone.py create mode 100644 yt_dlp/extractor/chzzk.py create mode 100644 yt_dlp/extractor/cinemax.py create mode 100644 yt_dlp/extractor/cinetecamilano.py create mode 100644 yt_dlp/extractor/cineverse.py create mode 100644 yt_dlp/extractor/ciscolive.py create mode 100644 yt_dlp/extractor/ciscowebex.py create mode 100644 yt_dlp/extractor/cjsw.py create mode 100644 yt_dlp/extractor/clipchamp.py create mode 100644 yt_dlp/extractor/clippit.py create mode 100644 yt_dlp/extractor/cliprs.py create mode 100644 yt_dlp/extractor/closertotruth.py create mode 100644 yt_dlp/extractor/cloudflarestream.py create mode 100644 yt_dlp/extractor/cloudycdn.py create mode 100644 yt_dlp/extractor/clubic.py create mode 100644 yt_dlp/extractor/clyp.py create mode 100644 yt_dlp/extractor/cmt.py create mode 100644 yt_dlp/extractor/cnbc.py create mode 100644 yt_dlp/extractor/cnn.py create mode 100644 yt_dlp/extractor/comedycentral.py create mode 100644 yt_dlp/extractor/common.py create mode 100644 yt_dlp/extractor/commonmistakes.py create mode 100644 yt_dlp/extractor/commonprotocols.py create mode 100644 yt_dlp/extractor/condenast.py create mode 100644 yt_dlp/extractor/contv.py create mode 100644 yt_dlp/extractor/corus.py create mode 100644 yt_dlp/extractor/coub.py create mode 100644 yt_dlp/extractor/cozytv.py create mode 100644 yt_dlp/extractor/cpac.py create mode 100644 yt_dlp/extractor/cracked.py create mode 100644 yt_dlp/extractor/crackle.py create mode 100644 yt_dlp/extractor/craftsy.py create mode 100644 yt_dlp/extractor/crooksandliars.py create mode 100644 yt_dlp/extractor/crowdbunker.py create mode 100644 yt_dlp/extractor/crtvg.py create mode 100644 yt_dlp/extractor/crunchyroll.py create mode 100644 yt_dlp/extractor/cspan.py create mode 100644 yt_dlp/extractor/ctsnews.py create mode 100644 yt_dlp/extractor/ctv.py create mode 100644 yt_dlp/extractor/ctvnews.py create mode 100644 yt_dlp/extractor/cultureunplugged.py create mode 100644 yt_dlp/extractor/curiositystream.py create mode 100644 yt_dlp/extractor/cwtv.py create mode 100644 yt_dlp/extractor/cybrary.py create mode 100644 yt_dlp/extractor/dacast.py create mode 100644 yt_dlp/extractor/dailymail.py create mode 100644 yt_dlp/extractor/dailymotion.py create mode 100644 yt_dlp/extractor/dailywire.py create mode 100644 yt_dlp/extractor/damtomo.py create mode 100644 yt_dlp/extractor/daum.py create mode 100644 yt_dlp/extractor/daystar.py create mode 100644 yt_dlp/extractor/dbtv.py create mode 100644 yt_dlp/extractor/dctp.py create mode 100644 yt_dlp/extractor/deezer.py create mode 100644 yt_dlp/extractor/democracynow.py create mode 100644 yt_dlp/extractor/detik.py create mode 100644 yt_dlp/extractor/deuxm.py create mode 100644 yt_dlp/extractor/dfb.py create mode 100644 yt_dlp/extractor/dhm.py create mode 100644 yt_dlp/extractor/digitalconcerthall.py create mode 100644 yt_dlp/extractor/digiteka.py create mode 100644 yt_dlp/extractor/discogs.py create mode 100644 yt_dlp/extractor/discovery.py create mode 100644 yt_dlp/extractor/discoverygo.py create mode 100644 yt_dlp/extractor/disney.py create mode 100644 yt_dlp/extractor/dispeak.py create mode 100644 yt_dlp/extractor/dlf.py create mode 100644 yt_dlp/extractor/dlive.py create mode 100644 yt_dlp/extractor/douyutv.py create mode 100644 yt_dlp/extractor/dplay.py create mode 100644 yt_dlp/extractor/drbonanza.py create mode 100644 yt_dlp/extractor/dreisat.py create mode 100644 yt_dlp/extractor/drooble.py create mode 100644 yt_dlp/extractor/dropbox.py create mode 100644 yt_dlp/extractor/dropout.py create mode 100644 yt_dlp/extractor/drtuber.py create mode 100644 yt_dlp/extractor/drtv.py create mode 100644 yt_dlp/extractor/dtube.py create mode 100644 yt_dlp/extractor/duboku.py create mode 100644 yt_dlp/extractor/dumpert.py create mode 100644 yt_dlp/extractor/duoplay.py create mode 100644 yt_dlp/extractor/dvtv.py create mode 100644 yt_dlp/extractor/dw.py create mode 100644 yt_dlp/extractor/eagleplatform.py create mode 100644 yt_dlp/extractor/ebaumsworld.py create mode 100644 yt_dlp/extractor/ebay.py create mode 100644 yt_dlp/extractor/egghead.py create mode 100644 yt_dlp/extractor/eighttracks.py create mode 100644 yt_dlp/extractor/einthusan.py create mode 100644 yt_dlp/extractor/eitb.py create mode 100644 yt_dlp/extractor/elementorembed.py create mode 100644 yt_dlp/extractor/elonet.py create mode 100644 yt_dlp/extractor/elpais.py create mode 100644 yt_dlp/extractor/eltrecetv.py create mode 100644 yt_dlp/extractor/embedly.py create mode 100644 yt_dlp/extractor/epicon.py create mode 100644 yt_dlp/extractor/epidemicsound.py create mode 100644 yt_dlp/extractor/eplus.py create mode 100644 yt_dlp/extractor/epoch.py create mode 100644 yt_dlp/extractor/eporner.py create mode 100644 yt_dlp/extractor/erocast.py create mode 100644 yt_dlp/extractor/eroprofile.py create mode 100644 yt_dlp/extractor/err.py create mode 100644 yt_dlp/extractor/ertgr.py create mode 100644 yt_dlp/extractor/espn.py create mode 100644 yt_dlp/extractor/ettutv.py create mode 100644 yt_dlp/extractor/europa.py create mode 100644 yt_dlp/extractor/europeantour.py create mode 100644 yt_dlp/extractor/eurosport.py create mode 100644 yt_dlp/extractor/euscreen.py create mode 100644 yt_dlp/extractor/expressen.py create mode 100644 yt_dlp/extractor/extractors.py create mode 100644 yt_dlp/extractor/eyedotv.py create mode 100644 yt_dlp/extractor/facebook.py create mode 100644 yt_dlp/extractor/fancode.py create mode 100644 yt_dlp/extractor/faz.py create mode 100644 yt_dlp/extractor/fc2.py create mode 100644 yt_dlp/extractor/fczenit.py create mode 100644 yt_dlp/extractor/fifa.py create mode 100644 yt_dlp/extractor/filmon.py create mode 100644 yt_dlp/extractor/filmweb.py create mode 100644 yt_dlp/extractor/firsttv.py create mode 100644 yt_dlp/extractor/fivetv.py create mode 100644 yt_dlp/extractor/flextv.py create mode 100644 yt_dlp/extractor/flickr.py create mode 100644 yt_dlp/extractor/floatplane.py create mode 100644 yt_dlp/extractor/folketinget.py create mode 100644 yt_dlp/extractor/footyroom.py create mode 100644 yt_dlp/extractor/formula1.py create mode 100644 yt_dlp/extractor/fourtube.py create mode 100644 yt_dlp/extractor/fox.py create mode 100644 yt_dlp/extractor/fox9.py create mode 100644 yt_dlp/extractor/foxnews.py create mode 100644 yt_dlp/extractor/foxsports.py create mode 100644 yt_dlp/extractor/fptplay.py create mode 100644 yt_dlp/extractor/franceinter.py create mode 100644 yt_dlp/extractor/francetv.py create mode 100644 yt_dlp/extractor/freesound.py create mode 100644 yt_dlp/extractor/freespeech.py create mode 100644 yt_dlp/extractor/freetv.py create mode 100644 yt_dlp/extractor/frontendmasters.py create mode 100644 yt_dlp/extractor/fujitv.py create mode 100644 yt_dlp/extractor/funimation.py create mode 100644 yt_dlp/extractor/funk.py create mode 100644 yt_dlp/extractor/funker530.py create mode 100644 yt_dlp/extractor/fuyintv.py create mode 100644 yt_dlp/extractor/gab.py create mode 100644 yt_dlp/extractor/gaia.py create mode 100644 yt_dlp/extractor/gamejolt.py create mode 100644 yt_dlp/extractor/gamespot.py create mode 100644 yt_dlp/extractor/gamestar.py create mode 100644 yt_dlp/extractor/gaskrank.py create mode 100644 yt_dlp/extractor/gazeta.py create mode 100644 yt_dlp/extractor/gdcvault.py create mode 100644 yt_dlp/extractor/gedidigital.py create mode 100644 yt_dlp/extractor/generic.py create mode 100644 yt_dlp/extractor/genericembeds.py create mode 100644 yt_dlp/extractor/genius.py create mode 100644 yt_dlp/extractor/getcourseru.py create mode 100644 yt_dlp/extractor/gettr.py create mode 100644 yt_dlp/extractor/giantbomb.py create mode 100644 yt_dlp/extractor/gigya.py create mode 100644 yt_dlp/extractor/glide.py create mode 100644 yt_dlp/extractor/globalplayer.py create mode 100644 yt_dlp/extractor/globo.py create mode 100644 yt_dlp/extractor/glomex.py create mode 100644 yt_dlp/extractor/gmanetwork.py create mode 100644 yt_dlp/extractor/go.py create mode 100644 yt_dlp/extractor/godtube.py create mode 100644 yt_dlp/extractor/gofile.py create mode 100644 yt_dlp/extractor/golem.py create mode 100644 yt_dlp/extractor/goodgame.py create mode 100644 yt_dlp/extractor/googledrive.py create mode 100644 yt_dlp/extractor/googlepodcasts.py create mode 100644 yt_dlp/extractor/googlesearch.py create mode 100644 yt_dlp/extractor/goplay.py create mode 100644 yt_dlp/extractor/gopro.py create mode 100644 yt_dlp/extractor/goshgay.py create mode 100644 yt_dlp/extractor/gotostage.py create mode 100644 yt_dlp/extractor/gputechconf.py create mode 100644 yt_dlp/extractor/gronkh.py create mode 100644 yt_dlp/extractor/groupon.py create mode 100644 yt_dlp/extractor/harpodeon.py create mode 100644 yt_dlp/extractor/hbo.py create mode 100644 yt_dlp/extractor/hearthisat.py create mode 100644 yt_dlp/extractor/heise.py create mode 100644 yt_dlp/extractor/hellporno.py create mode 100644 yt_dlp/extractor/hgtv.py create mode 100644 yt_dlp/extractor/hidive.py create mode 100644 yt_dlp/extractor/historicfilms.py create mode 100644 yt_dlp/extractor/hitrecord.py create mode 100644 yt_dlp/extractor/hketv.py create mode 100644 yt_dlp/extractor/hollywoodreporter.py create mode 100644 yt_dlp/extractor/holodex.py create mode 100644 yt_dlp/extractor/hotnewhiphop.py create mode 100644 yt_dlp/extractor/hotstar.py create mode 100644 yt_dlp/extractor/hrefli.py create mode 100644 yt_dlp/extractor/hrfensehen.py create mode 100644 yt_dlp/extractor/hrti.py create mode 100644 yt_dlp/extractor/hse.py create mode 100644 yt_dlp/extractor/huajiao.py create mode 100644 yt_dlp/extractor/huffpost.py create mode 100644 yt_dlp/extractor/hungama.py create mode 100644 yt_dlp/extractor/huya.py create mode 100644 yt_dlp/extractor/hypem.py create mode 100644 yt_dlp/extractor/hypergryph.py create mode 100644 yt_dlp/extractor/hytale.py create mode 100644 yt_dlp/extractor/icareus.py create mode 100644 yt_dlp/extractor/ichinanalive.py create mode 100644 yt_dlp/extractor/idolplus.py create mode 100644 yt_dlp/extractor/ign.py create mode 100644 yt_dlp/extractor/iheart.py create mode 100644 yt_dlp/extractor/ilpost.py create mode 100644 yt_dlp/extractor/iltalehti.py create mode 100644 yt_dlp/extractor/imdb.py create mode 100644 yt_dlp/extractor/imggaming.py create mode 100644 yt_dlp/extractor/imgur.py create mode 100644 yt_dlp/extractor/ina.py create mode 100644 yt_dlp/extractor/inc.py create mode 100644 yt_dlp/extractor/indavideo.py create mode 100644 yt_dlp/extractor/infoq.py create mode 100644 yt_dlp/extractor/instagram.py create mode 100644 yt_dlp/extractor/internazionale.py create mode 100644 yt_dlp/extractor/internetvideoarchive.py create mode 100644 yt_dlp/extractor/iprima.py create mode 100644 yt_dlp/extractor/iqiyi.py create mode 100644 yt_dlp/extractor/islamchannel.py create mode 100644 yt_dlp/extractor/israelnationalnews.py create mode 100644 yt_dlp/extractor/itprotv.py create mode 100644 yt_dlp/extractor/itv.py create mode 100644 yt_dlp/extractor/ivi.py create mode 100644 yt_dlp/extractor/ivideon.py create mode 100644 yt_dlp/extractor/iwara.py create mode 100644 yt_dlp/extractor/ixigua.py create mode 100644 yt_dlp/extractor/izlesene.py create mode 100644 yt_dlp/extractor/jable.py create mode 100644 yt_dlp/extractor/jamendo.py create mode 100644 yt_dlp/extractor/japandiet.py create mode 100644 yt_dlp/extractor/jeuxvideo.py create mode 100644 yt_dlp/extractor/jiosaavn.py create mode 100644 yt_dlp/extractor/jixie.py create mode 100644 yt_dlp/extractor/joj.py create mode 100644 yt_dlp/extractor/joqrag.py create mode 100644 yt_dlp/extractor/jove.py create mode 100644 yt_dlp/extractor/jstream.py create mode 100644 yt_dlp/extractor/jtbc.py create mode 100644 yt_dlp/extractor/jwplatform.py create mode 100644 yt_dlp/extractor/kakao.py create mode 100644 yt_dlp/extractor/kaltura.py create mode 100644 yt_dlp/extractor/kankanews.py create mode 100644 yt_dlp/extractor/karaoketv.py create mode 100644 yt_dlp/extractor/kelbyone.py create mode 100644 yt_dlp/extractor/khanacademy.py create mode 100644 yt_dlp/extractor/kick.py create mode 100644 yt_dlp/extractor/kicker.py create mode 100644 yt_dlp/extractor/kickstarter.py create mode 100644 yt_dlp/extractor/kinja.py create mode 100644 yt_dlp/extractor/kinopoisk.py create mode 100644 yt_dlp/extractor/kommunetv.py create mode 100644 yt_dlp/extractor/kompas.py create mode 100644 yt_dlp/extractor/koo.py create mode 100644 yt_dlp/extractor/krasview.py create mode 100644 yt_dlp/extractor/kth.py create mode 100644 yt_dlp/extractor/ku6.py create mode 100644 yt_dlp/extractor/kukululive.py create mode 100644 yt_dlp/extractor/kuwo.py create mode 100644 yt_dlp/extractor/la7.py create mode 100644 yt_dlp/extractor/lastfm.py create mode 100644 yt_dlp/extractor/laxarxames.py create mode 100644 yt_dlp/extractor/lbry.py create mode 100644 yt_dlp/extractor/lci.py create mode 100644 yt_dlp/extractor/lcp.py create mode 100644 yt_dlp/extractor/lecture2go.py create mode 100644 yt_dlp/extractor/lecturio.py create mode 100644 yt_dlp/extractor/leeco.py create mode 100644 yt_dlp/extractor/lefigaro.py create mode 100644 yt_dlp/extractor/lego.py create mode 100644 yt_dlp/extractor/lemonde.py create mode 100644 yt_dlp/extractor/lenta.py create mode 100644 yt_dlp/extractor/libraryofcongress.py create mode 100644 yt_dlp/extractor/libsyn.py create mode 100644 yt_dlp/extractor/lifenews.py create mode 100644 yt_dlp/extractor/likee.py create mode 100644 yt_dlp/extractor/limelight.py create mode 100644 yt_dlp/extractor/linkedin.py create mode 100644 yt_dlp/extractor/liputan6.py create mode 100644 yt_dlp/extractor/listennotes.py create mode 100644 yt_dlp/extractor/litv.py create mode 100644 yt_dlp/extractor/livejournal.py create mode 100644 yt_dlp/extractor/livestream.py create mode 100644 yt_dlp/extractor/livestreamfails.py create mode 100644 yt_dlp/extractor/lnkgo.py create mode 100644 yt_dlp/extractor/lovehomeporn.py create mode 100644 yt_dlp/extractor/lrt.py create mode 100644 yt_dlp/extractor/lsm.py create mode 100644 yt_dlp/extractor/lumni.py create mode 100644 yt_dlp/extractor/lynda.py create mode 100644 yt_dlp/extractor/maariv.py create mode 100644 yt_dlp/extractor/magellantv.py create mode 100644 yt_dlp/extractor/magentamusik.py create mode 100644 yt_dlp/extractor/mailru.py create mode 100644 yt_dlp/extractor/mainstreaming.py create mode 100644 yt_dlp/extractor/mangomolo.py create mode 100644 yt_dlp/extractor/manoto.py create mode 100644 yt_dlp/extractor/manyvids.py create mode 100644 yt_dlp/extractor/maoritv.py create mode 100644 yt_dlp/extractor/markiza.py create mode 100644 yt_dlp/extractor/massengeschmacktv.py create mode 100644 yt_dlp/extractor/masters.py create mode 100644 yt_dlp/extractor/matchtv.py create mode 100644 yt_dlp/extractor/mbn.py create mode 100644 yt_dlp/extractor/mdr.py create mode 100644 yt_dlp/extractor/medaltv.py create mode 100644 yt_dlp/extractor/mediaite.py create mode 100644 yt_dlp/extractor/mediaklikk.py create mode 100644 yt_dlp/extractor/medialaan.py create mode 100644 yt_dlp/extractor/mediaset.py create mode 100644 yt_dlp/extractor/mediasite.py create mode 100644 yt_dlp/extractor/mediastream.py create mode 100644 yt_dlp/extractor/mediaworksnz.py create mode 100644 yt_dlp/extractor/medici.py create mode 100644 yt_dlp/extractor/megaphone.py create mode 100644 yt_dlp/extractor/megatvcom.py create mode 100644 yt_dlp/extractor/meipai.py create mode 100644 yt_dlp/extractor/melonvod.py create mode 100644 yt_dlp/extractor/metacritic.py create mode 100644 yt_dlp/extractor/mgtv.py create mode 100644 yt_dlp/extractor/microsoftembed.py create mode 100644 yt_dlp/extractor/microsoftstream.py create mode 100644 yt_dlp/extractor/microsoftvirtualacademy.py create mode 100644 yt_dlp/extractor/mildom.py create mode 100644 yt_dlp/extractor/minds.py create mode 100644 yt_dlp/extractor/minoto.py create mode 100644 yt_dlp/extractor/mirrativ.py create mode 100644 yt_dlp/extractor/mirrorcouk.py create mode 100644 yt_dlp/extractor/mit.py create mode 100644 yt_dlp/extractor/mitele.py create mode 100644 yt_dlp/extractor/mixch.py create mode 100644 yt_dlp/extractor/mixcloud.py create mode 100644 yt_dlp/extractor/mlb.py create mode 100644 yt_dlp/extractor/mlssoccer.py create mode 100644 yt_dlp/extractor/mocha.py create mode 100644 yt_dlp/extractor/mojvideo.py create mode 100644 yt_dlp/extractor/monstercat.py create mode 100644 yt_dlp/extractor/motherless.py create mode 100644 yt_dlp/extractor/motorsport.py create mode 100644 yt_dlp/extractor/moviepilot.py create mode 100644 yt_dlp/extractor/moview.py create mode 100644 yt_dlp/extractor/moviezine.py create mode 100644 yt_dlp/extractor/movingimage.py create mode 100644 yt_dlp/extractor/msn.py create mode 100644 yt_dlp/extractor/mtv.py create mode 100644 yt_dlp/extractor/muenchentv.py create mode 100644 yt_dlp/extractor/murrtube.py create mode 100644 yt_dlp/extractor/museai.py create mode 100644 yt_dlp/extractor/musescore.py create mode 100644 yt_dlp/extractor/musicdex.py create mode 100644 yt_dlp/extractor/mx3.py create mode 100644 yt_dlp/extractor/mxplayer.py create mode 100644 yt_dlp/extractor/myspace.py create mode 100644 yt_dlp/extractor/myspass.py create mode 100644 yt_dlp/extractor/myvideoge.py create mode 100644 yt_dlp/extractor/myvidster.py create mode 100644 yt_dlp/extractor/mzaalo.py create mode 100644 yt_dlp/extractor/n1.py create mode 100644 yt_dlp/extractor/nate.py create mode 100644 yt_dlp/extractor/nationalgeographic.py create mode 100644 yt_dlp/extractor/naver.py create mode 100644 yt_dlp/extractor/nba.py create mode 100644 yt_dlp/extractor/nbc.py create mode 100644 yt_dlp/extractor/ndr.py create mode 100644 yt_dlp/extractor/ndtv.py create mode 100644 yt_dlp/extractor/nebula.py create mode 100644 yt_dlp/extractor/nekohacker.py create mode 100644 yt_dlp/extractor/nerdcubed.py create mode 100644 yt_dlp/extractor/neteasemusic.py create mode 100644 yt_dlp/extractor/netverse.py create mode 100644 yt_dlp/extractor/netzkino.py create mode 100644 yt_dlp/extractor/newgrounds.py create mode 100644 yt_dlp/extractor/newspicks.py create mode 100644 yt_dlp/extractor/newsy.py create mode 100644 yt_dlp/extractor/nextmedia.py create mode 100644 yt_dlp/extractor/nexx.py create mode 100644 yt_dlp/extractor/nfb.py create mode 100644 yt_dlp/extractor/nfhsnetwork.py create mode 100644 yt_dlp/extractor/nfl.py create mode 100644 yt_dlp/extractor/nhk.py create mode 100644 yt_dlp/extractor/nhl.py create mode 100644 yt_dlp/extractor/nick.py create mode 100644 yt_dlp/extractor/niconico.py create mode 100644 yt_dlp/extractor/niconicochannelplus.py create mode 100644 yt_dlp/extractor/ninaprotocol.py create mode 100644 yt_dlp/extractor/ninecninemedia.py create mode 100644 yt_dlp/extractor/ninegag.py create mode 100644 yt_dlp/extractor/ninenews.py create mode 100644 yt_dlp/extractor/ninenow.py create mode 100644 yt_dlp/extractor/nintendo.py create mode 100644 yt_dlp/extractor/nitter.py create mode 100644 yt_dlp/extractor/nobelprize.py create mode 100644 yt_dlp/extractor/noice.py create mode 100644 yt_dlp/extractor/nonktube.py create mode 100644 yt_dlp/extractor/noodlemagazine.py create mode 100644 yt_dlp/extractor/noovo.py create mode 100644 yt_dlp/extractor/nosnl.py create mode 100644 yt_dlp/extractor/nova.py create mode 100644 yt_dlp/extractor/novaplay.py create mode 100644 yt_dlp/extractor/nowness.py create mode 100644 yt_dlp/extractor/noz.py create mode 100644 yt_dlp/extractor/npo.py create mode 100644 yt_dlp/extractor/npr.py create mode 100644 yt_dlp/extractor/nrk.py create mode 100644 yt_dlp/extractor/nrl.py create mode 100644 yt_dlp/extractor/ntvcojp.py create mode 100644 yt_dlp/extractor/ntvde.py create mode 100644 yt_dlp/extractor/ntvru.py create mode 100644 yt_dlp/extractor/nubilesporn.py create mode 100644 yt_dlp/extractor/nuevo.py create mode 100644 yt_dlp/extractor/nuum.py create mode 100644 yt_dlp/extractor/nuvid.py create mode 100644 yt_dlp/extractor/nytimes.py create mode 100644 yt_dlp/extractor/nzherald.py create mode 100644 yt_dlp/extractor/nzonscreen.py create mode 100644 yt_dlp/extractor/nzz.py create mode 100644 yt_dlp/extractor/odkmedia.py create mode 100644 yt_dlp/extractor/odnoklassniki.py create mode 100644 yt_dlp/extractor/oftv.py create mode 100644 yt_dlp/extractor/oktoberfesttv.py create mode 100644 yt_dlp/extractor/olympics.py create mode 100644 yt_dlp/extractor/on24.py create mode 100644 yt_dlp/extractor/once.py create mode 100644 yt_dlp/extractor/ondemandkorea.py create mode 100644 yt_dlp/extractor/onefootball.py create mode 100644 yt_dlp/extractor/onenewsnz.py create mode 100644 yt_dlp/extractor/oneplace.py create mode 100644 yt_dlp/extractor/onet.py create mode 100644 yt_dlp/extractor/onionstudios.py create mode 100644 yt_dlp/extractor/opencast.py create mode 100644 yt_dlp/extractor/openload.py create mode 100644 yt_dlp/extractor/openrec.py create mode 100644 yt_dlp/extractor/ora.py create mode 100644 yt_dlp/extractor/orf.py create mode 100644 yt_dlp/extractor/outsidetv.py create mode 100644 yt_dlp/extractor/owncloud.py create mode 100644 yt_dlp/extractor/packtpub.py create mode 100644 yt_dlp/extractor/palcomp3.py create mode 100644 yt_dlp/extractor/panopto.py create mode 100644 yt_dlp/extractor/paramountplus.py create mode 100644 yt_dlp/extractor/parler.py create mode 100644 yt_dlp/extractor/parlview.py create mode 100644 yt_dlp/extractor/patreon.py create mode 100644 yt_dlp/extractor/pbs.py create mode 100644 yt_dlp/extractor/pearvideo.py create mode 100644 yt_dlp/extractor/peekvids.py create mode 100644 yt_dlp/extractor/peertube.py create mode 100644 yt_dlp/extractor/peertv.py create mode 100644 yt_dlp/extractor/peloton.py create mode 100644 yt_dlp/extractor/performgroup.py create mode 100644 yt_dlp/extractor/periscope.py create mode 100644 yt_dlp/extractor/pgatour.py create mode 100644 yt_dlp/extractor/philharmoniedeparis.py create mode 100644 yt_dlp/extractor/phoenix.py create mode 100644 yt_dlp/extractor/photobucket.py create mode 100644 yt_dlp/extractor/piapro.py create mode 100644 yt_dlp/extractor/piaulizaportal.py create mode 100644 yt_dlp/extractor/picarto.py create mode 100644 yt_dlp/extractor/piksel.py create mode 100644 yt_dlp/extractor/pinkbike.py create mode 100644 yt_dlp/extractor/pinterest.py create mode 100644 yt_dlp/extractor/pixivsketch.py create mode 100644 yt_dlp/extractor/pladform.py create mode 100644 yt_dlp/extractor/planetmarathi.py create mode 100644 yt_dlp/extractor/platzi.py create mode 100644 yt_dlp/extractor/playplustv.py create mode 100644 yt_dlp/extractor/playsuisse.py create mode 100644 yt_dlp/extractor/playtvak.py create mode 100644 yt_dlp/extractor/playwire.py create mode 100644 yt_dlp/extractor/pluralsight.py create mode 100644 yt_dlp/extractor/plutotv.py create mode 100644 yt_dlp/extractor/podbayfm.py create mode 100644 yt_dlp/extractor/podchaser.py create mode 100644 yt_dlp/extractor/podomatic.py create mode 100644 yt_dlp/extractor/pokemon.py create mode 100644 yt_dlp/extractor/pokergo.py create mode 100644 yt_dlp/extractor/polsatgo.py create mode 100644 yt_dlp/extractor/polskieradio.py create mode 100644 yt_dlp/extractor/popcorntimes.py create mode 100644 yt_dlp/extractor/popcorntv.py create mode 100644 yt_dlp/extractor/porn91.py create mode 100644 yt_dlp/extractor/pornbox.py create mode 100644 yt_dlp/extractor/pornflip.py create mode 100644 yt_dlp/extractor/pornhub.py create mode 100644 yt_dlp/extractor/pornotube.py create mode 100644 yt_dlp/extractor/pornovoisines.py create mode 100644 yt_dlp/extractor/pornoxo.py create mode 100644 yt_dlp/extractor/pr0gramm.py create mode 100644 yt_dlp/extractor/prankcast.py create mode 100644 yt_dlp/extractor/premiershiprugby.py create mode 100644 yt_dlp/extractor/presstv.py create mode 100644 yt_dlp/extractor/projectveritas.py create mode 100644 yt_dlp/extractor/prosiebensat1.py create mode 100644 yt_dlp/extractor/prx.py create mode 100644 yt_dlp/extractor/puhutv.py create mode 100644 yt_dlp/extractor/puls4.py create mode 100644 yt_dlp/extractor/pyvideo.py create mode 100644 yt_dlp/extractor/qdance.py create mode 100644 yt_dlp/extractor/qingting.py create mode 100644 yt_dlp/extractor/qqmusic.py create mode 100644 yt_dlp/extractor/r7.py create mode 100644 yt_dlp/extractor/radiko.py create mode 100644 yt_dlp/extractor/radiocanada.py create mode 100644 yt_dlp/extractor/radiocomercial.py create mode 100644 yt_dlp/extractor/radiode.py create mode 100644 yt_dlp/extractor/radiofrance.py create mode 100644 yt_dlp/extractor/radiojavan.py create mode 100644 yt_dlp/extractor/radiokapital.py create mode 100644 yt_dlp/extractor/radiozet.py create mode 100644 yt_dlp/extractor/radlive.py create mode 100644 yt_dlp/extractor/rai.py create mode 100644 yt_dlp/extractor/raywenderlich.py create mode 100644 yt_dlp/extractor/rbgtum.py create mode 100644 yt_dlp/extractor/rcs.py create mode 100644 yt_dlp/extractor/rcti.py create mode 100644 yt_dlp/extractor/rds.py create mode 100644 yt_dlp/extractor/redbee.py create mode 100644 yt_dlp/extractor/redbulltv.py create mode 100644 yt_dlp/extractor/reddit.py create mode 100644 yt_dlp/extractor/redge.py create mode 100644 yt_dlp/extractor/redgifs.py create mode 100644 yt_dlp/extractor/redtube.py create mode 100644 yt_dlp/extractor/rentv.py create mode 100644 yt_dlp/extractor/restudy.py create mode 100644 yt_dlp/extractor/reuters.py create mode 100644 yt_dlp/extractor/reverbnation.py create mode 100644 yt_dlp/extractor/rheinmaintv.py create mode 100644 yt_dlp/extractor/ridehome.py create mode 100644 yt_dlp/extractor/rinsefm.py create mode 100644 yt_dlp/extractor/rmcdecouverte.py create mode 100644 yt_dlp/extractor/rockstargames.py create mode 100644 yt_dlp/extractor/rokfin.py create mode 100644 yt_dlp/extractor/roosterteeth.py create mode 100644 yt_dlp/extractor/rottentomatoes.py create mode 100644 yt_dlp/extractor/rozhlas.py create mode 100644 yt_dlp/extractor/rte.py create mode 100644 yt_dlp/extractor/rtl2.py create mode 100644 yt_dlp/extractor/rtlnl.py create mode 100644 yt_dlp/extractor/rtnews.py create mode 100644 yt_dlp/extractor/rtp.py create mode 100644 yt_dlp/extractor/rtrfm.py create mode 100644 yt_dlp/extractor/rts.py create mode 100644 yt_dlp/extractor/rtvcplay.py create mode 100644 yt_dlp/extractor/rtve.py create mode 100644 yt_dlp/extractor/rtvs.py create mode 100644 yt_dlp/extractor/rtvslo.py create mode 100644 yt_dlp/extractor/rudovideo.py create mode 100644 yt_dlp/extractor/rule34video.py create mode 100644 yt_dlp/extractor/rumble.py create mode 100644 yt_dlp/extractor/rutube.py create mode 100644 yt_dlp/extractor/rutv.py create mode 100644 yt_dlp/extractor/ruutu.py create mode 100644 yt_dlp/extractor/ruv.py create mode 100644 yt_dlp/extractor/s4c.py create mode 100644 yt_dlp/extractor/safari.py create mode 100644 yt_dlp/extractor/saitosan.py create mode 100644 yt_dlp/extractor/samplefocus.py create mode 100644 yt_dlp/extractor/sapo.py create mode 100644 yt_dlp/extractor/sbs.py create mode 100644 yt_dlp/extractor/sbscokr.py create mode 100644 yt_dlp/extractor/screen9.py create mode 100644 yt_dlp/extractor/screencast.py create mode 100644 yt_dlp/extractor/screencastify.py create mode 100644 yt_dlp/extractor/screencastomatic.py create mode 100644 yt_dlp/extractor/scrippsnetworks.py create mode 100644 yt_dlp/extractor/scrolller.py create mode 100644 yt_dlp/extractor/scte.py create mode 100644 yt_dlp/extractor/sejmpl.py create mode 100644 yt_dlp/extractor/senalcolombia.py create mode 100644 yt_dlp/extractor/senategov.py create mode 100644 yt_dlp/extractor/sendtonews.py create mode 100644 yt_dlp/extractor/servus.py create mode 100644 yt_dlp/extractor/sevenplus.py create mode 100644 yt_dlp/extractor/sexu.py create mode 100644 yt_dlp/extractor/seznamzpravy.py create mode 100644 yt_dlp/extractor/shahid.py create mode 100644 yt_dlp/extractor/sharevideos.py create mode 100644 yt_dlp/extractor/shemaroome.py create mode 100644 yt_dlp/extractor/showroomlive.py create mode 100644 yt_dlp/extractor/sibnet.py create mode 100644 yt_dlp/extractor/simplecast.py create mode 100644 yt_dlp/extractor/sina.py create mode 100644 yt_dlp/extractor/sixplay.py create mode 100644 yt_dlp/extractor/skeb.py create mode 100644 yt_dlp/extractor/sky.py create mode 100644 yt_dlp/extractor/skyit.py create mode 100644 yt_dlp/extractor/skylinewebcams.py create mode 100644 yt_dlp/extractor/skynewsarabia.py create mode 100644 yt_dlp/extractor/skynewsau.py create mode 100644 yt_dlp/extractor/slideshare.py create mode 100644 yt_dlp/extractor/slideslive.py create mode 100644 yt_dlp/extractor/slutload.py create mode 100644 yt_dlp/extractor/smotrim.py create mode 100644 yt_dlp/extractor/snotr.py create mode 100644 yt_dlp/extractor/sohu.py create mode 100644 yt_dlp/extractor/sonyliv.py create mode 100644 yt_dlp/extractor/soundcloud.py create mode 100644 yt_dlp/extractor/soundgasm.py create mode 100644 yt_dlp/extractor/southpark.py create mode 100644 yt_dlp/extractor/sovietscloset.py create mode 100644 yt_dlp/extractor/spankbang.py create mode 100644 yt_dlp/extractor/spiegel.py create mode 100644 yt_dlp/extractor/spike.py create mode 100644 yt_dlp/extractor/sport5.py create mode 100644 yt_dlp/extractor/sportbox.py create mode 100644 yt_dlp/extractor/sportdeutschland.py create mode 100644 yt_dlp/extractor/spotify.py create mode 100644 yt_dlp/extractor/spreaker.py create mode 100644 yt_dlp/extractor/springboardplatform.py create mode 100644 yt_dlp/extractor/sprout.py create mode 100644 yt_dlp/extractor/srgssr.py create mode 100644 yt_dlp/extractor/srmediathek.py create mode 100644 yt_dlp/extractor/stacommu.py create mode 100644 yt_dlp/extractor/stageplus.py create mode 100644 yt_dlp/extractor/stanfordoc.py create mode 100644 yt_dlp/extractor/startrek.py create mode 100644 yt_dlp/extractor/startv.py create mode 100644 yt_dlp/extractor/steam.py create mode 100644 yt_dlp/extractor/stitcher.py create mode 100644 yt_dlp/extractor/storyfire.py create mode 100644 yt_dlp/extractor/streamable.py create mode 100644 yt_dlp/extractor/streamcz.py create mode 100644 yt_dlp/extractor/streetvoice.py create mode 100644 yt_dlp/extractor/stretchinternet.py create mode 100644 yt_dlp/extractor/stripchat.py create mode 100644 yt_dlp/extractor/stv.py create mode 100644 yt_dlp/extractor/substack.py create mode 100644 yt_dlp/extractor/sunporno.py create mode 100644 yt_dlp/extractor/sverigesradio.py create mode 100644 yt_dlp/extractor/svt.py create mode 100644 yt_dlp/extractor/swearnet.py create mode 100644 yt_dlp/extractor/syfy.py create mode 100644 yt_dlp/extractor/syvdk.py create mode 100644 yt_dlp/extractor/sztvhu.py create mode 100644 yt_dlp/extractor/tagesschau.py create mode 100644 yt_dlp/extractor/tass.py create mode 100644 yt_dlp/extractor/tbs.py create mode 100644 yt_dlp/extractor/tbsjp.py create mode 100644 yt_dlp/extractor/teachable.py create mode 100644 yt_dlp/extractor/teachertube.py create mode 100644 yt_dlp/extractor/teachingchannel.py create mode 100644 yt_dlp/extractor/teamcoco.py create mode 100644 yt_dlp/extractor/teamtreehouse.py create mode 100644 yt_dlp/extractor/ted.py create mode 100644 yt_dlp/extractor/tele13.py create mode 100644 yt_dlp/extractor/tele5.py create mode 100644 yt_dlp/extractor/telebruxelles.py create mode 100644 yt_dlp/extractor/telecaribe.py create mode 100644 yt_dlp/extractor/telecinco.py create mode 100644 yt_dlp/extractor/telegraaf.py create mode 100644 yt_dlp/extractor/telegram.py create mode 100644 yt_dlp/extractor/telemb.py create mode 100644 yt_dlp/extractor/telemundo.py create mode 100644 yt_dlp/extractor/telequebec.py create mode 100644 yt_dlp/extractor/teletask.py create mode 100644 yt_dlp/extractor/telewebion.py create mode 100644 yt_dlp/extractor/tempo.py create mode 100644 yt_dlp/extractor/tencent.py create mode 100644 yt_dlp/extractor/tennistv.py create mode 100644 yt_dlp/extractor/tenplay.py create mode 100644 yt_dlp/extractor/testurl.py create mode 100644 yt_dlp/extractor/tf1.py create mode 100644 yt_dlp/extractor/tfo.py create mode 100644 yt_dlp/extractor/theguardian.py create mode 100644 yt_dlp/extractor/theholetv.py create mode 100644 yt_dlp/extractor/theintercept.py create mode 100644 yt_dlp/extractor/theplatform.py create mode 100644 yt_dlp/extractor/thestar.py create mode 100644 yt_dlp/extractor/thesun.py create mode 100644 yt_dlp/extractor/theweatherchannel.py create mode 100644 yt_dlp/extractor/thisamericanlife.py create mode 100644 yt_dlp/extractor/thisoldhouse.py create mode 100644 yt_dlp/extractor/thisvid.py create mode 100644 yt_dlp/extractor/threeqsdn.py create mode 100644 yt_dlp/extractor/threespeak.py create mode 100644 yt_dlp/extractor/tiktok.py create mode 100644 yt_dlp/extractor/tmz.py create mode 100644 yt_dlp/extractor/tnaflix.py create mode 100644 yt_dlp/extractor/toggle.py create mode 100644 yt_dlp/extractor/toggo.py create mode 100644 yt_dlp/extractor/tonline.py create mode 100644 yt_dlp/extractor/toongoggles.py create mode 100644 yt_dlp/extractor/toutv.py create mode 100644 yt_dlp/extractor/toypics.py create mode 100644 yt_dlp/extractor/traileraddict.py create mode 100644 yt_dlp/extractor/triller.py create mode 100644 yt_dlp/extractor/trovo.py create mode 100644 yt_dlp/extractor/trtcocuk.py create mode 100644 yt_dlp/extractor/trtworld.py create mode 100644 yt_dlp/extractor/trueid.py create mode 100644 yt_dlp/extractor/trunews.py create mode 100644 yt_dlp/extractor/truth.py create mode 100644 yt_dlp/extractor/trutv.py create mode 100644 yt_dlp/extractor/tube8.py create mode 100644 yt_dlp/extractor/tubetugraz.py create mode 100644 yt_dlp/extractor/tubitv.py create mode 100644 yt_dlp/extractor/tumblr.py create mode 100644 yt_dlp/extractor/tunein.py create mode 100644 yt_dlp/extractor/turner.py create mode 100644 yt_dlp/extractor/tv2.py create mode 100644 yt_dlp/extractor/tv24ua.py create mode 100644 yt_dlp/extractor/tv2dk.py create mode 100644 yt_dlp/extractor/tv2hu.py create mode 100644 yt_dlp/extractor/tv4.py create mode 100644 yt_dlp/extractor/tv5mondeplus.py create mode 100644 yt_dlp/extractor/tv5unis.py create mode 100644 yt_dlp/extractor/tva.py create mode 100644 yt_dlp/extractor/tvanouvelles.py create mode 100644 yt_dlp/extractor/tvc.py create mode 100644 yt_dlp/extractor/tver.py create mode 100644 yt_dlp/extractor/tvigle.py create mode 100644 yt_dlp/extractor/tviplayer.py create mode 100644 yt_dlp/extractor/tvland.py create mode 100644 yt_dlp/extractor/tvn24.py create mode 100644 yt_dlp/extractor/tvnoe.py create mode 100644 yt_dlp/extractor/tvopengr.py create mode 100644 yt_dlp/extractor/tvp.py create mode 100644 yt_dlp/extractor/tvplay.py create mode 100644 yt_dlp/extractor/tvplayer.py create mode 100644 yt_dlp/extractor/tweakers.py create mode 100644 yt_dlp/extractor/twentymin.py create mode 100644 yt_dlp/extractor/twentythreevideo.py create mode 100644 yt_dlp/extractor/twitcasting.py create mode 100644 yt_dlp/extractor/twitch.py create mode 100644 yt_dlp/extractor/twitter.py create mode 100644 yt_dlp/extractor/txxx.py create mode 100644 yt_dlp/extractor/udemy.py create mode 100644 yt_dlp/extractor/udn.py create mode 100644 yt_dlp/extractor/ufctv.py create mode 100644 yt_dlp/extractor/ukcolumn.py create mode 100644 yt_dlp/extractor/uktvplay.py create mode 100644 yt_dlp/extractor/umg.py create mode 100644 yt_dlp/extractor/unistra.py create mode 100644 yt_dlp/extractor/unity.py create mode 100644 yt_dlp/extractor/unsupported.py create mode 100644 yt_dlp/extractor/uol.py create mode 100644 yt_dlp/extractor/uplynk.py create mode 100644 yt_dlp/extractor/urort.py create mode 100644 yt_dlp/extractor/urplay.py create mode 100644 yt_dlp/extractor/usanetwork.py create mode 100644 yt_dlp/extractor/usatoday.py create mode 100644 yt_dlp/extractor/ustream.py create mode 100644 yt_dlp/extractor/ustudio.py create mode 100644 yt_dlp/extractor/utreon.py create mode 100644 yt_dlp/extractor/varzesh3.py create mode 100644 yt_dlp/extractor/vbox7.py create mode 100644 yt_dlp/extractor/veo.py create mode 100644 yt_dlp/extractor/veoh.py create mode 100644 yt_dlp/extractor/vesti.py create mode 100644 yt_dlp/extractor/vevo.py create mode 100644 yt_dlp/extractor/vgtv.py create mode 100644 yt_dlp/extractor/vh1.py create mode 100644 yt_dlp/extractor/vice.py create mode 100644 yt_dlp/extractor/viddler.py create mode 100644 yt_dlp/extractor/videa.py create mode 100644 yt_dlp/extractor/videocampus_sachsen.py create mode 100644 yt_dlp/extractor/videodetective.py create mode 100644 yt_dlp/extractor/videofyme.py create mode 100644 yt_dlp/extractor/videoken.py create mode 100644 yt_dlp/extractor/videomore.py create mode 100644 yt_dlp/extractor/videopress.py create mode 100644 yt_dlp/extractor/vidio.py create mode 100644 yt_dlp/extractor/vidlii.py create mode 100644 yt_dlp/extractor/vidly.py create mode 100644 yt_dlp/extractor/viewlift.py create mode 100644 yt_dlp/extractor/viidea.py create mode 100644 yt_dlp/extractor/viki.py create mode 100644 yt_dlp/extractor/vimeo.py create mode 100644 yt_dlp/extractor/vimm.py create mode 100644 yt_dlp/extractor/vine.py create mode 100644 yt_dlp/extractor/viously.py create mode 100644 yt_dlp/extractor/viqeo.py create mode 100644 yt_dlp/extractor/viu.py create mode 100644 yt_dlp/extractor/vk.py create mode 100644 yt_dlp/extractor/vocaroo.py create mode 100644 yt_dlp/extractor/vodpl.py create mode 100644 yt_dlp/extractor/vodplatform.py create mode 100644 yt_dlp/extractor/voicy.py create mode 100644 yt_dlp/extractor/volejtv.py create mode 100644 yt_dlp/extractor/voot.py create mode 100644 yt_dlp/extractor/voxmedia.py create mode 100644 yt_dlp/extractor/vrt.py create mode 100644 yt_dlp/extractor/vtm.py create mode 100644 yt_dlp/extractor/vuclip.py create mode 100644 yt_dlp/extractor/vvvvid.py create mode 100644 yt_dlp/extractor/walla.py create mode 100644 yt_dlp/extractor/washingtonpost.py create mode 100644 yt_dlp/extractor/wat.py create mode 100644 yt_dlp/extractor/wdr.py create mode 100644 yt_dlp/extractor/webcamerapl.py create mode 100644 yt_dlp/extractor/webcaster.py create mode 100644 yt_dlp/extractor/webofstories.py create mode 100644 yt_dlp/extractor/weibo.py create mode 100644 yt_dlp/extractor/weiqitv.py create mode 100644 yt_dlp/extractor/weverse.py create mode 100644 yt_dlp/extractor/wevidi.py create mode 100644 yt_dlp/extractor/weyyak.py create mode 100644 yt_dlp/extractor/whowatch.py create mode 100644 yt_dlp/extractor/whyp.py create mode 100644 yt_dlp/extractor/wikimedia.py create mode 100644 yt_dlp/extractor/wimbledon.py create mode 100644 yt_dlp/extractor/wimtv.py create mode 100644 yt_dlp/extractor/wistia.py create mode 100644 yt_dlp/extractor/wordpress.py create mode 100644 yt_dlp/extractor/worldstarhiphop.py create mode 100644 yt_dlp/extractor/wppilot.py create mode 100644 yt_dlp/extractor/wrestleuniverse.py create mode 100644 yt_dlp/extractor/wsj.py create mode 100644 yt_dlp/extractor/wwe.py create mode 100644 yt_dlp/extractor/wykop.py create mode 100644 yt_dlp/extractor/xanimu.py create mode 100644 yt_dlp/extractor/xboxclips.py create mode 100644 yt_dlp/extractor/xfileshare.py create mode 100644 yt_dlp/extractor/xhamster.py create mode 100644 yt_dlp/extractor/ximalaya.py create mode 100644 yt_dlp/extractor/xinpianchang.py create mode 100644 yt_dlp/extractor/xminus.py create mode 100644 yt_dlp/extractor/xnxx.py create mode 100644 yt_dlp/extractor/xstream.py create mode 100644 yt_dlp/extractor/xvideos.py create mode 100644 yt_dlp/extractor/xxxymovies.py create mode 100644 yt_dlp/extractor/yahoo.py create mode 100644 yt_dlp/extractor/yandexdisk.py create mode 100644 yt_dlp/extractor/yandexmusic.py create mode 100644 yt_dlp/extractor/yandexvideo.py create mode 100644 yt_dlp/extractor/yapfiles.py create mode 100644 yt_dlp/extractor/yappy.py create mode 100644 yt_dlp/extractor/yle_areena.py create mode 100644 yt_dlp/extractor/youjizz.py create mode 100644 yt_dlp/extractor/youku.py create mode 100644 yt_dlp/extractor/younow.py create mode 100644 yt_dlp/extractor/youporn.py create mode 100644 yt_dlp/extractor/yourporn.py create mode 100644 yt_dlp/extractor/yourupload.py create mode 100644 yt_dlp/extractor/youtube.py create mode 100644 yt_dlp/extractor/zaiko.py create mode 100644 yt_dlp/extractor/zapiks.py create mode 100644 yt_dlp/extractor/zattoo.py create mode 100644 yt_dlp/extractor/zdf.py create mode 100644 yt_dlp/extractor/zee5.py create mode 100644 yt_dlp/extractor/zeenews.py create mode 100644 yt_dlp/extractor/zenporn.py create mode 100644 yt_dlp/extractor/zetland.py create mode 100644 yt_dlp/extractor/zhihu.py create mode 100644 yt_dlp/extractor/zingmp3.py create mode 100644 yt_dlp/extractor/zoom.py create mode 100644 yt_dlp/extractor/zype.py create mode 100644 yt_dlp/jsinterp.py create mode 100644 yt_dlp/minicurses.py create mode 100644 yt_dlp/networking/__init__.py create mode 100644 yt_dlp/networking/_helper.py create mode 100644 yt_dlp/networking/_requests.py create mode 100644 yt_dlp/networking/_urllib.py create mode 100644 yt_dlp/networking/_websockets.py create mode 100644 yt_dlp/networking/common.py create mode 100644 yt_dlp/networking/exceptions.py create mode 100644 yt_dlp/networking/websocket.py create mode 100644 yt_dlp/options.py create mode 100644 yt_dlp/plugins.py create mode 100644 yt_dlp/postprocessor/__init__.py create mode 100644 yt_dlp/postprocessor/common.py create mode 100644 yt_dlp/postprocessor/embedthumbnail.py create mode 100644 yt_dlp/postprocessor/exec.py create mode 100644 yt_dlp/postprocessor/ffmpeg.py create mode 100644 yt_dlp/postprocessor/metadataparser.py create mode 100644 yt_dlp/postprocessor/modify_chapters.py create mode 100644 yt_dlp/postprocessor/movefilesafterdownload.py create mode 100644 yt_dlp/postprocessor/sponskrub.py create mode 100644 yt_dlp/postprocessor/sponsorblock.py create mode 100644 yt_dlp/postprocessor/xattrpp.py create mode 100644 yt_dlp/socks.py create mode 100644 yt_dlp/update.py create mode 100644 yt_dlp/utils/__init__.py create mode 100644 yt_dlp/utils/_deprecated.py create mode 100644 yt_dlp/utils/_legacy.py create mode 100644 yt_dlp/utils/_utils.py create mode 100644 yt_dlp/utils/networking.py create mode 100644 yt_dlp/utils/progress.py create mode 100644 yt_dlp/utils/traversal.py create mode 100644 yt_dlp/version.py create mode 100644 yt_dlp/webvtt.py diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..40c19fa --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +root = true + +[**.py] +charset = utf-8 +indent_size = 4 +indent_style = space +trim_trailing_whitespace = true +insert_final_newline = true diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..f3e1df5 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +* text=auto + +Makefile* text whitespace=-tab-in-indent +*.sh text eol=lf +*.md diff=markdown +*.py diff=python diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..04de087 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,13 @@ +# These are supported funding model platforms + +github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username + +custom: ['https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators'] diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml new file mode 100644 index 0000000..5df13ad --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -0,0 +1,79 @@ +name: Broken site support +description: Report issue with yt-dlp on a supported site +labels: [triage, site-bug] +body: + - type: checkboxes + attributes: + label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE + description: Fill all fields even if you think it is irrelevant for the issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field + required: true + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting that yt-dlp is broken on a **supported** site + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: Enter the country/region that the site is accessible from + placeholder: India + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + - type: checkboxes + id: verbose + attributes: + label: Provide verbose output that clearly demonstrates the problem + options: + - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false + - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below + required: true + - type: textarea + id: log + attributes: + label: Complete Verbose Output + description: | + It should start like this: + placeholder: | + [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) + [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 + [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 + [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] Proxy map: {} + [debug] Request Handlers: urllib, requests + [debug] Loaded 1893 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) + [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc + + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml new file mode 100644 index 0000000..644c87a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -0,0 +1,91 @@ +name: Site support request +description: Request support for a new site +labels: [triage, site-request] +body: + - type: checkboxes + attributes: + label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE + description: Fill all fields even if you think it is irrelevant for the issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field + required: true + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a new site support request + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details + required: true + - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required + - type: input + id: region + attributes: + label: Region + description: Enter the country/region that the site is accessible from + placeholder: India + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Provide all kinds of example URLs for which support should be added + placeholder: | + - Single video: https://www.youtube.com/watch?v=BaW_jenozKc + - Single video: https://youtu.be/BaW_jenozKc + - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + validations: + required: true + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + - type: checkboxes + id: verbose + attributes: + label: Provide verbose output that clearly demonstrates the problem + options: + - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false + - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below + required: true + - type: textarea + id: log + attributes: + label: Complete Verbose Output + description: | + It should start like this: + placeholder: | + [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) + [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 + [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 + [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] Proxy map: {} + [debug] Request Handlers: urllib, requests + [debug] Loaded 1893 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) + [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc + + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml new file mode 100644 index 0000000..59d0474 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -0,0 +1,87 @@ +name: Site feature request +description: Request a new functionality for a supported site +labels: [triage, site-enhancement] +body: + - type: checkboxes + attributes: + label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE + description: Fill all fields even if you think it is irrelevant for the issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field + required: true + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm requesting a site-specific feature + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: Enter the country/region that the site is accessible from + placeholder: India + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Example URLs that can be used to demonstrate the requested feature + placeholder: | + https://www.youtube.com/watch?v=BaW_jenozKc + validations: + required: true + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + - type: checkboxes + id: verbose + attributes: + label: Provide verbose output that clearly demonstrates the problem + options: + - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false + - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below + required: true + - type: textarea + id: log + attributes: + label: Complete Verbose Output + description: | + It should start like this: + placeholder: | + [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) + [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 + [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 + [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] Proxy map: {} + [debug] Request Handlers: urllib, requests + [debug] Loaded 1893 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) + [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc + + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml new file mode 100644 index 0000000..e207396 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -0,0 +1,72 @@ +name: Core bug report +description: Report a bug unrelated to any particular site or extractor +labels: [triage, bug] +body: + - type: checkboxes + attributes: + label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE + description: Fill all fields even if you think it is irrelevant for the issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field + required: true + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a bug unrelated to a specific site + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + - type: checkboxes + id: verbose + attributes: + label: Provide verbose output that clearly demonstrates the problem + options: + - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false + - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below + required: true + - type: textarea + id: log + attributes: + label: Complete Verbose Output + description: | + It should start like this: + placeholder: | + [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) + [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 + [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 + [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] Proxy map: {} + [debug] Request Handlers: urllib, requests + [debug] Loaded 1893 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) + [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc + + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml new file mode 100644 index 0000000..e06db9c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -0,0 +1,66 @@ +name: Feature request +description: Request a new functionality unrelated to any particular site or extractor +labels: [triage, enhancement] +body: + - type: checkboxes + attributes: + label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE + description: Fill all fields even if you think it is irrelevant for the issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field + required: true + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm requesting a feature unrelated to a specific site + required: true + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + - type: checkboxes + id: verbose + attributes: + label: Provide verbose output that clearly demonstrates the problem + options: + - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false + - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below + - type: textarea + id: log + attributes: + label: Complete Verbose Output + description: | + It should start like this: + placeholder: | + [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) + [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 + [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 + [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] Proxy map: {} + [debug] Request Handlers: urllib, requests + [debug] Loaded 1893 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) + [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc + + render: shell diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml new file mode 100644 index 0000000..571223a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -0,0 +1,72 @@ +name: Ask question +description: Ask yt-dlp related question +labels: [question] +body: + - type: checkboxes + attributes: + label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE + description: Fill all fields even if you think it is irrelevant for the issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field + required: true + - type: markdown + attributes: + value: | + ### Make sure you are **only** asking a question and not reporting a bug or requesting a feature. + If your question contains "isn't working" or "can you add", this is most likely the wrong template. + If you are in doubt whether this is the right template, **USE ANOTHER TEMPLATE**! + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm asking a question and **not** reporting a bug or requesting a feature + required: true + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: question + attributes: + label: Please make sure the question is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information and as much context and examples as possible + validations: + required: true + - type: checkboxes + id: verbose + attributes: + label: Provide verbose output that clearly demonstrates the problem + options: + - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false + - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below + - type: textarea + id: log + attributes: + label: Complete Verbose Output + description: | + It should start like this: + placeholder: | + [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) + [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 + [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 + [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] Proxy map: {} + [debug] Request Handlers: urllib, requests + [debug] Loaded 1893 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) + [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc + + render: shell diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..9cdffa4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: Get help from the community on Discord + url: https://discord.gg/H5MNcFW63r + about: Join the yt-dlp Discord for community-powered support! + - name: Matrix Bridge to the Discord server + url: https://matrix.to/#/#yt-dlp:matrix.org + about: For those who do not want to use Discord diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml new file mode 100644 index 0000000..bff28ae --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml @@ -0,0 +1,40 @@ +name: Broken site support +description: Report issue with yt-dlp on a supported site +labels: [triage, site-bug] +body: + %(no_skip)s + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting that yt-dlp is broken on a **supported** site + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: Enter the country/region that the site is accessible from + placeholder: India + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + %(verbose)s diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml new file mode 100644 index 0000000..2bffe73 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -0,0 +1,52 @@ +name: Site support request +description: Request support for a new site +labels: [triage, site-request] +body: + %(no_skip)s + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a new site support request + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details + required: true + - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required + - type: input + id: region + attributes: + label: Region + description: Enter the country/region that the site is accessible from + placeholder: India + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Provide all kinds of example URLs for which support should be added + placeholder: | + - Single video: https://www.youtube.com/watch?v=BaW_jenozKc + - Single video: https://youtu.be/BaW_jenozKc + - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + validations: + required: true + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + %(verbose)s diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml new file mode 100644 index 0000000..6c31279 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml @@ -0,0 +1,48 @@ +name: Site feature request +description: Request a new functionality for a supported site +labels: [triage, site-enhancement] +body: + %(no_skip)s + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm requesting a site-specific feature + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: Enter the country/region that the site is accessible from + placeholder: India + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Example URLs that can be used to demonstrate the requested feature + placeholder: | + https://www.youtube.com/watch?v=BaW_jenozKc + validations: + required: true + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + %(verbose)s diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml new file mode 100644 index 0000000..5f357d9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml @@ -0,0 +1,33 @@ +name: Core bug report +description: Report a bug unrelated to any particular site or extractor +labels: [triage, bug] +body: + %(no_skip)s + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a bug unrelated to a specific site + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + %(verbose)s diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml new file mode 100644 index 0000000..99107ff --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml @@ -0,0 +1,31 @@ +name: Feature request +description: Request a new functionality unrelated to any particular site or extractor +labels: [triage, enhancement] +body: + %(no_skip)s + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm requesting a feature unrelated to a specific site + required: true + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Provide a description that is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + %(verbose_optional)s diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml new file mode 100644 index 0000000..bd74210 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml @@ -0,0 +1,37 @@ +name: Ask question +description: Ask yt-dlp related question +labels: [question] +body: + %(no_skip)s + - type: markdown + attributes: + value: | + ### Make sure you are **only** asking a question and not reporting a bug or requesting a feature. + If your question contains "isn't working" or "can you add", this is most likely the wrong template. + If you are in doubt whether this is the right template, **USE ANOTHER TEMPLATE**! + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm asking a question and **not** reporting a bug or requesting a feature + required: true + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) + required: true + - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) + required: true + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: question + attributes: + label: Please make sure the question is worded well enough to be understood + description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) + placeholder: Provide any additional information and as much context and examples as possible + validations: + required: true + %(verbose_optional)s diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..c4d3e81 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,43 @@ +**IMPORTANT**: PRs without the template will be CLOSED + +### Description of your *pull request* and other information + + + +ADD DESCRIPTION HERE + +Fixes # + + +
Template + + + +### Before submitting a *pull request* make sure you have: +- [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) +- [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) + +### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply: +- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) +- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) + +### What is the purpose of your *pull request*? +- [ ] Fix or improvement to an extractor (Make sure to add/update tests) +- [ ] New extractor ([Piracy websites will not be accepted](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy)) +- [ ] Core bug fix/improvement +- [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes)) + +
diff --git a/.github/banner.svg b/.github/banner.svg new file mode 100644 index 0000000..35dc93e --- /dev/null +++ b/.github/banner.svg @@ -0,0 +1,31 @@ + + + + + + + + + diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..4bed5af --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,487 @@ +name: Build Artifacts +on: + workflow_call: + inputs: + version: + required: true + type: string + channel: + required: false + default: stable + type: string + unix: + default: true + type: boolean + linux_arm: + default: true + type: boolean + macos: + default: true + type: boolean + macos_legacy: + default: true + type: boolean + windows: + default: true + type: boolean + windows32: + default: true + type: boolean + meta_files: + default: true + type: boolean + origin: + required: false + default: '' + type: string + secrets: + GPG_SIGNING_KEY: + required: false + + workflow_dispatch: + inputs: + version: + description: | + VERSION: yyyy.mm.dd[.rev] or rev + required: true + type: string + channel: + description: | + SOURCE of this build's updates: stable/nightly/master/ + required: true + default: stable + type: string + unix: + description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip + default: true + type: boolean + linux_arm: + description: yt-dlp_linux_aarch64, yt-dlp_linux_armv7l + default: true + type: boolean + macos: + description: yt-dlp_macos, yt-dlp_macos.zip + default: true + type: boolean + macos_legacy: + description: yt-dlp_macos_legacy + default: true + type: boolean + windows: + description: yt-dlp.exe, yt-dlp_min.exe, yt-dlp_win.zip + default: true + type: boolean + windows32: + description: yt-dlp_x86.exe + default: true + type: boolean + meta_files: + description: SHA2-256SUMS, SHA2-512SUMS, _update_spec + default: true + type: boolean + origin: + description: Origin + required: false + default: 'current repo' + type: choice + options: + - 'current repo' + +permissions: + contents: read + +jobs: + process: + runs-on: ubuntu-latest + outputs: + origin: ${{ steps.process_origin.outputs.origin }} + steps: + - name: Process origin + id: process_origin + run: | + echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT" + + unix: + needs: process + if: inputs.unix + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-variant: Mambaforge + use-mamba: true + channels: conda-forge + auto-update-conda: true + activate-environment: "" + auto-activate-base: false + - name: Install Requirements + run: | + sudo apt -y install zip pandoc man sed + cat > ./requirements.txt << EOF + python=3.10.* + brotli-python + EOF + python devscripts/install_deps.py --print \ + --exclude brotli --exclude brotlicffi \ + --include secretstorage --include pyinstaller >> ./requirements.txt + mamba create -n build --file ./requirements.txt + + - name: Prepare + run: | + python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" + python devscripts/make_lazy_extractors.py + - name: Build Unix platform-independent binary + run: | + make all tar + - name: Build Unix standalone binary + shell: bash -l {0} + run: | + unset LD_LIBRARY_PATH # Harmful; set by setup-python + conda activate build + python -m bundle.pyinstaller --onedir + (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .) + python -m bundle.pyinstaller + mv ./dist/yt-dlp_linux ./yt-dlp_linux + mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip + + - name: Verify --update-to + if: vars.UPDATE_TO_VERIFICATION + run: | + binaries=("yt-dlp" "yt-dlp_linux") + for binary in "${binaries[@]}"; do + chmod +x ./${binary} + cp ./${binary} ./${binary}_downgraded + version="$(./${binary} --version)" + ./${binary}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 + downgraded_version="$(./${binary}_downgraded --version)" + [[ "$version" != "$downgraded_version" ]] + done + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: build-bin-${{ github.job }} + path: | + yt-dlp + yt-dlp.tar.gz + yt-dlp_linux + yt-dlp_linux.zip + compression-level: 0 + + linux_arm: + needs: process + if: inputs.linux_arm + permissions: + contents: read + packages: write # for creating cache + runs-on: ubuntu-latest + strategy: + matrix: + architecture: + - armv7 + - aarch64 + + steps: + - uses: actions/checkout@v4 + with: + path: ./repo + - name: Virtualized Install, Prepare & Build + uses: yt-dlp/run-on-arch-action@v2 + with: + # Ref: https://github.com/uraimo/run-on-arch-action/issues/55 + env: | + GITHUB_WORKFLOW: build + githubToken: ${{ github.token }} # To cache image + arch: ${{ matrix.architecture }} + distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS + dockerRunArgs: --volume "${PWD}/repo:/repo" + install: | # Installing Python 3.10 from the Deadsnakes repo raises errors + apt update + apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip + python3.8 -m pip install -U pip setuptools wheel + # Cannot access any files from the repo directory at this stage + python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi + + run: | + cd repo + python3.8 devscripts/install_deps.py -o --include build + python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date + python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" + python3.8 devscripts/make_lazy_extractors.py + python3.8 -m bundle.pyinstaller + + if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then + arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}" + chmod +x ./dist/yt-dlp_linux_${arch} + cp ./dist/yt-dlp_linux_${arch} ./dist/yt-dlp_linux_${arch}_downgraded + version="$(./dist/yt-dlp_linux_${arch} --version)" + ./dist/yt-dlp_linux_${arch}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 + downgraded_version="$(./dist/yt-dlp_linux_${arch}_downgraded --version)" + [[ "$version" != "$downgraded_version" ]] + fi + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: build-bin-linux_${{ matrix.architecture }} + path: | # run-on-arch-action designates armv7l as armv7 + repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} + compression-level: 0 + + macos: + needs: process + if: inputs.macos + runs-on: macos-11 + + steps: + - uses: actions/checkout@v4 + # NB: Building universal2 does not work with python from actions/setup-python + - name: Install Requirements + run: | + brew install coreutils + python3 devscripts/install_deps.py --user -o --include build + python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt + # We need to ignore wheels otherwise we break universal2 builds + python3 -m pip install -U --user --no-binary :all: -r requirements.txt + + - name: Prepare + run: | + python3 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" + python3 devscripts/make_lazy_extractors.py + - name: Build + run: | + python3 -m bundle.pyinstaller --target-architecture universal2 --onedir + (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .) + python3 -m bundle.pyinstaller --target-architecture universal2 + + - name: Verify --update-to + if: vars.UPDATE_TO_VERIFICATION + run: | + chmod +x ./dist/yt-dlp_macos + cp ./dist/yt-dlp_macos ./dist/yt-dlp_macos_downgraded + version="$(./dist/yt-dlp_macos --version)" + ./dist/yt-dlp_macos_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 + downgraded_version="$(./dist/yt-dlp_macos_downgraded --version)" + [[ "$version" != "$downgraded_version" ]] + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: build-bin-${{ github.job }} + path: | + dist/yt-dlp_macos + dist/yt-dlp_macos.zip + compression-level: 0 + + macos_legacy: + needs: process + if: inputs.macos_legacy + runs-on: macos-latest + + steps: + - uses: actions/checkout@v4 + - name: Install Python + # We need the official Python, because the GA ones only support newer macOS versions + env: + PYTHON_VERSION: 3.10.5 + MACOSX_DEPLOYMENT_TARGET: 10.9 # Used up by the Python build tools + run: | + # Hack to get the latest patch version. Uncomment if needed + #brew install python@3.10 + #export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 ) + curl https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg -o "python.pkg" + sudo installer -pkg python.pkg -target / + python3 --version + - name: Install Requirements + run: | + brew install coreutils + python3 devscripts/install_deps.py --user -o --include build + python3 devscripts/install_deps.py --user --include pyinstaller + + - name: Prepare + run: | + python3 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" + python3 devscripts/make_lazy_extractors.py + - name: Build + run: | + python3 -m bundle.pyinstaller + mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy + + - name: Verify --update-to + if: vars.UPDATE_TO_VERIFICATION + run: | + chmod +x ./dist/yt-dlp_macos_legacy + cp ./dist/yt-dlp_macos_legacy ./dist/yt-dlp_macos_legacy_downgraded + version="$(./dist/yt-dlp_macos_legacy --version)" + ./dist/yt-dlp_macos_legacy_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 + downgraded_version="$(./dist/yt-dlp_macos_legacy_downgraded --version)" + [[ "$version" != "$downgraded_version" ]] + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: build-bin-${{ github.job }} + path: | + dist/yt-dlp_macos_legacy + compression-level: 0 + + windows: + needs: process + if: inputs.windows + runs-on: windows-latest + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: # 3.8 is used for Win7 support + python-version: "3.8" + - name: Install Requirements + run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds + python devscripts/install_deps.py -o --include build + python devscripts/install_deps.py --include py2exe + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" + + - name: Prepare + run: | + python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" + python devscripts/make_lazy_extractors.py + - name: Build + run: | + python -m bundle.py2exe + Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe + python -m bundle.pyinstaller + python -m bundle.pyinstaller --onedir + Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip + + - name: Verify --update-to + if: vars.UPDATE_TO_VERIFICATION + run: | + foreach ($name in @("yt-dlp","yt-dlp_min")) { + Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" + $version = & "./dist/${name}.exe" --version + & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04 + $downgraded_version = & "./dist/${name}_downgraded.exe" --version + if ($version -eq $downgraded_version) { + exit 1 + } + } + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: build-bin-${{ github.job }} + path: | + dist/yt-dlp.exe + dist/yt-dlp_min.exe + dist/yt-dlp_win.zip + compression-level: 0 + + windows32: + needs: process + if: inputs.windows32 + runs-on: windows-latest + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.8" + architecture: "x86" + - name: Install Requirements + run: | + python devscripts/install_deps.py -o --include build + python devscripts/install_deps.py + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" + + - name: Prepare + run: | + python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" + python devscripts/make_lazy_extractors.py + - name: Build + run: | + python -m bundle.pyinstaller + + - name: Verify --update-to + if: vars.UPDATE_TO_VERIFICATION + run: | + foreach ($name in @("yt-dlp_x86")) { + Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" + $version = & "./dist/${name}.exe" --version + & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04 + $downgraded_version = & "./dist/${name}_downgraded.exe" --version + if ($version -eq $downgraded_version) { + exit 1 + } + } + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: build-bin-${{ github.job }} + path: | + dist/yt-dlp_x86.exe + compression-level: 0 + + meta_files: + if: inputs.meta_files && always() && !cancelled() + needs: + - process + - unix + - linux_arm + - macos + - macos_legacy + - windows + - windows32 + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + path: artifact + pattern: build-bin-* + merge-multiple: true + + - name: Make SHA2-SUMS files + run: | + cd ./artifact/ + sha256sum * > ../SHA2-256SUMS + sha512sum * > ../SHA2-512SUMS + + - name: Make Update spec + run: | + cat >> _update_spec << EOF + # This file is used for regulating self-update + lock 2022.08.18.36 .+ Python 3\.6 + lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 + lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 + lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 + lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 + lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 + lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) + EOF + + - name: Sign checksum files + env: + GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} + if: env.GPG_SIGNING_KEY != '' + run: | + gpg --batch --import <<< "${{ secrets.GPG_SIGNING_KEY }}" + for signfile in ./SHA*SUMS; do + gpg --batch --detach-sign "$signfile" + done + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: build-${{ github.job }} + path: | + _update_spec + SHA*SUMS* + compression-level: 0 + overwrite: true diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..170a6ac --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,65 @@ +name: "CodeQL" + +on: + push: + branches: [ 'master', 'gh-pages', 'release' ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ 'master' ] + schedule: + - cron: '59 11 * * 5' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] + # Use only 'java' to analyze code written in Java, Kotlin or both + # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml new file mode 100644 index 0000000..ba86306 --- /dev/null +++ b/.github/workflows/core.yml @@ -0,0 +1,61 @@ +name: Core Tests +on: + push: + paths: + - .github/** + - devscripts/** + - test/** + - yt_dlp/**.py + - '!yt_dlp/extractor/*.py' + - yt_dlp/extractor/__init__.py + - yt_dlp/extractor/common.py + - yt_dlp/extractor/extractors.py + pull_request: + paths: + - .github/** + - devscripts/** + - test/** + - yt_dlp/**.py + - '!yt_dlp/extractor/*.py' + - yt_dlp/extractor/__init__.py + - yt_dlp/extractor/common.py + - yt_dlp/extractor/extractors.py +permissions: + contents: read + +concurrency: + group: core-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + tests: + name: Core Tests + if: "!contains(github.event.head_commit.message, 'ci skip')" + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + # CPython 3.8 is in quick-test + python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] + include: + # atleast one of each CPython/PyPy tests must be in windows + - os: windows-latest + python-version: '3.8' + - os: windows-latest + python-version: '3.12' + - os: windows-latest + python-version: pypy-3.9 + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install test requirements + run: python3 ./devscripts/install_deps.py --include dev + - name: Run tests + continue-on-error: False + run: | + python3 -m yt_dlp -v || true # Print debug head + python3 ./devscripts/run_tests.py core diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml new file mode 100644 index 0000000..7256804 --- /dev/null +++ b/.github/workflows/download.yml @@ -0,0 +1,48 @@ +name: Download Tests +on: [push, pull_request] +permissions: + contents: read + +jobs: + quick: + name: Quick Download Tests + if: "contains(github.event.head_commit.message, 'ci run dl')" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.9 + - name: Install test requirements + run: python3 ./devscripts/install_deps.py --include dev + - name: Run tests + continue-on-error: true + run: python3 ./devscripts/run_tests.py download + + full: + name: Full Download Tests + if: "contains(github.event.head_commit.message, 'ci run dl all')" + runs-on: ${{ matrix.os }} + strategy: + fail-fast: true + matrix: + os: [ubuntu-latest] + python-version: ['3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] + include: + # atleast one of each CPython/PyPy tests must be in windows + - os: windows-latest + python-version: '3.8' + - os: windows-latest + python-version: pypy-3.9 + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install test requirements + run: python3 ./devscripts/install_deps.py --include dev + - name: Run tests + continue-on-error: true + run: python3 ./devscripts/run_tests.py download diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml new file mode 100644 index 0000000..3114e7b --- /dev/null +++ b/.github/workflows/quick-test.yml @@ -0,0 +1,35 @@ +name: Quick Test +on: [push, pull_request] +permissions: + contents: read + +jobs: + tests: + name: Core Test + if: "!contains(github.event.head_commit.message, 'ci skip all')" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.8 + uses: actions/setup-python@v5 + with: + python-version: '3.8' + - name: Install test requirements + run: python3 ./devscripts/install_deps.py --include dev + - name: Run tests + run: | + python3 -m yt_dlp -v || true + python3 ./devscripts/run_tests.py core + flake8: + name: Linter + if: "!contains(github.event.head_commit.message, 'ci skip all')" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: Install flake8 + run: python3 ./devscripts/install_deps.py -o --include dev + - name: Make lazy extractors + run: python3 ./devscripts/make_lazy_extractors.py + - name: Run flake8 + run: flake8 . diff --git a/.github/workflows/release-master.yml b/.github/workflows/release-master.yml new file mode 100644 index 0000000..a845475 --- /dev/null +++ b/.github/workflows/release-master.yml @@ -0,0 +1,29 @@ +name: Release (master) +on: + push: + branches: + - master + paths: + - "yt_dlp/**.py" + - "!yt_dlp/version.py" + - "bundle/*.py" + - "pyproject.toml" + - "Makefile" + - ".github/workflows/build.yml" +concurrency: + group: release-master +permissions: + contents: read + +jobs: + release: + if: vars.BUILD_MASTER != '' + uses: ./.github/workflows/release.yml + with: + prerelease: true + source: master + permissions: + contents: write + packages: write + id-token: write # mandatory for trusted publishing + secrets: inherit diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml new file mode 100644 index 0000000..f459a3a --- /dev/null +++ b/.github/workflows/release-nightly.yml @@ -0,0 +1,42 @@ +name: Release (nightly) +on: + schedule: + - cron: '23 23 * * *' +permissions: + contents: read + +jobs: + check_nightly: + if: vars.BUILD_NIGHTLY != '' + runs-on: ubuntu-latest + outputs: + commit: ${{ steps.check_for_new_commits.outputs.commit }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Check for new commits + id: check_for_new_commits + run: | + relevant_files=( + "yt_dlp/*.py" + ':!yt_dlp/version.py' + "bundle/*.py" + "pyproject.toml" + "Makefile" + ".github/workflows/build.yml" + ) + echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT" + + release: + needs: [check_nightly] + if: ${{ needs.check_nightly.outputs.commit }} + uses: ./.github/workflows/release.yml + with: + prerelease: true + source: nightly + permissions: + contents: write + packages: write + id-token: write # mandatory for trusted publishing + secrets: inherit diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..fd99cec --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,387 @@ +name: Release +on: + workflow_call: + inputs: + prerelease: + required: false + default: true + type: boolean + source: + required: false + default: '' + type: string + target: + required: false + default: '' + type: string + version: + required: false + default: '' + type: string + workflow_dispatch: + inputs: + source: + description: | + SOURCE of this release's updates: + channel, repo, tag, or channel/repo@tag + (default: ) + required: false + default: '' + type: string + target: + description: | + TARGET to publish this release to: + channel, tag, or channel@tag + (default: if writable else [@source_tag]) + required: false + default: '' + type: string + version: + description: | + VERSION: yyyy.mm.dd[.rev] or rev + (default: auto-generated) + required: false + default: '' + type: string + prerelease: + description: Pre-release + default: false + type: boolean + +permissions: + contents: read + +jobs: + prepare: + permissions: + contents: write + runs-on: ubuntu-latest + outputs: + channel: ${{ steps.setup_variables.outputs.channel }} + version: ${{ steps.setup_variables.outputs.version }} + target_repo: ${{ steps.setup_variables.outputs.target_repo }} + target_repo_token: ${{ steps.setup_variables.outputs.target_repo_token }} + target_tag: ${{ steps.setup_variables.outputs.target_tag }} + pypi_project: ${{ steps.setup_variables.outputs.pypi_project }} + pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }} + head_sha: ${{ steps.get_target.outputs.head_sha }} + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Process inputs + id: process_inputs + run: | + cat << EOF + ::group::Inputs + prerelease=${{ inputs.prerelease }} + source=${{ inputs.source }} + target=${{ inputs.target }} + version=${{ inputs.version }} + ::endgroup:: + EOF + IFS='@' read -r source_repo source_tag <<<"${{ inputs.source }}" + IFS='@' read -r target_repo target_tag <<<"${{ inputs.target }}" + cat << EOF >> "$GITHUB_OUTPUT" + source_repo=${source_repo} + source_tag=${source_tag} + target_repo=${target_repo} + target_tag=${target_tag} + EOF + + - name: Setup variables + id: setup_variables + env: + source_repo: ${{ steps.process_inputs.outputs.source_repo }} + source_tag: ${{ steps.process_inputs.outputs.source_tag }} + target_repo: ${{ steps.process_inputs.outputs.target_repo }} + target_tag: ${{ steps.process_inputs.outputs.target_tag }} + run: | + # unholy bash monstrosity (sincere apologies) + fallback_token () { + if ${{ !secrets.ARCHIVE_REPO_TOKEN }}; then + echo "::error::Repository access secret ${target_repo_token^^} not found" + exit 1 + fi + target_repo_token=ARCHIVE_REPO_TOKEN + return 0 + } + + source_is_channel=0 + [[ "${source_repo}" == 'stable' ]] && source_repo='yt-dlp/yt-dlp' + if [[ -z "${source_repo}" ]]; then + source_repo='${{ github.repository }}' + elif [[ '${{ vars[format('{0}_archive_repo', env.source_repo)] }}' ]]; then + source_is_channel=1 + source_channel='${{ vars[format('{0}_archive_repo', env.source_repo)] }}' + elif [[ -z "${source_tag}" && "${source_repo}" != */* ]]; then + source_tag="${source_repo}" + source_repo='${{ github.repository }}' + fi + resolved_source="${source_repo}" + if [[ "${source_tag}" ]]; then + resolved_source="${resolved_source}@${source_tag}" + elif [[ "${source_repo}" == 'yt-dlp/yt-dlp' ]]; then + resolved_source='stable' + fi + + revision="${{ (inputs.prerelease || !vars.PUSH_VERSION_COMMIT) && '$(date -u +"%H%M%S")' || '' }}" + version="$( + python devscripts/update-version.py \ + -c "${resolved_source}" -r "${{ github.repository }}" ${{ inputs.version || '$revision' }} | \ + grep -Po "version=\K\d+\.\d+\.\d+(\.\d+)?")" + + if [[ "${target_repo}" ]]; then + if [[ -z "${target_tag}" ]]; then + if [[ '${{ vars[format('{0}_archive_repo', env.target_repo)] }}' ]]; then + target_tag="${source_tag:-${version}}" + else + target_tag="${target_repo}" + target_repo='${{ github.repository }}' + fi + fi + if [[ "${target_repo}" != '${{ github.repository}}' ]]; then + target_repo='${{ vars[format('{0}_archive_repo', env.target_repo)] }}' + target_repo_token='${{ env.target_repo }}_archive_repo_token' + ${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token + pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}' + pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}' + fi + else + target_tag="${source_tag:-${version}}" + if ((source_is_channel)); then + target_repo="${source_channel}" + target_repo_token='${{ env.source_repo }}_archive_repo_token' + ${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token + pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}' + pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}' + else + target_repo='${{ github.repository }}' + fi + fi + + if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then + pypi_project='${{ vars.PYPI_PROJECT }}' + fi + + echo "::group::Output variables" + cat << EOF | tee -a "$GITHUB_OUTPUT" + channel=${resolved_source} + version=${version} + target_repo=${target_repo} + target_repo_token=${target_repo_token} + target_tag=${target_tag} + pypi_project=${pypi_project} + pypi_suffix=${pypi_suffix} + EOF + echo "::endgroup::" + + - name: Update documentation + env: + version: ${{ steps.setup_variables.outputs.version }} + target_repo: ${{ steps.setup_variables.outputs.target_repo }} + if: | + !inputs.prerelease && env.target_repo == github.repository + run: | + make doc + sed '/### /Q' Changelog.md >> ./CHANGELOG + echo '### ${{ env.version }}' >> ./CHANGELOG + python ./devscripts/make_changelog.py -vv -c >> ./CHANGELOG + echo >> ./CHANGELOG + grep -Poz '(?s)### \d+\.\d+\.\d+.+' 'Changelog.md' | head -n -1 >> ./CHANGELOG + cat ./CHANGELOG > Changelog.md + + - name: Push to release + id: push_release + env: + version: ${{ steps.setup_variables.outputs.version }} + target_repo: ${{ steps.setup_variables.outputs.target_repo }} + if: | + !inputs.prerelease && env.target_repo == github.repository + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add -u + git commit -m "Release ${{ env.version }}" \ + -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl" + git push origin --force ${{ github.event.ref }}:release + + - name: Get target commitish + id: get_target + run: | + echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" + + - name: Update master + env: + target_repo: ${{ steps.setup_variables.outputs.target_repo }} + if: | + vars.PUSH_VERSION_COMMIT != '' && !inputs.prerelease && env.target_repo == github.repository + run: git push origin ${{ github.event.ref }} + + build: + needs: prepare + uses: ./.github/workflows/build.yml + with: + version: ${{ needs.prepare.outputs.version }} + channel: ${{ needs.prepare.outputs.channel }} + origin: ${{ needs.prepare.outputs.target_repo }} + permissions: + contents: read + packages: write # For package cache + secrets: + GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} + + publish_pypi: + needs: [prepare, build] + if: ${{ needs.prepare.outputs.pypi_project }} + runs-on: ubuntu-latest + permissions: + id-token: write # mandatory for trusted publishing + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install Requirements + run: | + sudo apt -y install pandoc man + python devscripts/install_deps.py -o --include build + + - name: Prepare + env: + version: ${{ needs.prepare.outputs.version }} + suffix: ${{ needs.prepare.outputs.pypi_suffix }} + channel: ${{ needs.prepare.outputs.channel }} + target_repo: ${{ needs.prepare.outputs.target_repo }} + pypi_project: ${{ needs.prepare.outputs.pypi_project }} + run: | + python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}" + python devscripts/make_lazy_extractors.py + sed -i -E '0,/(name = ")[^"]+(")/s//\1${{ env.pypi_project }}\2/' pyproject.toml + + - name: Build + run: | + rm -rf dist/* + make pypi-files + printf '%s\n\n' \ + 'Official repository: ' \ + '**PS**: Some links in this document will not work since this is a copy of the README.md from Github' > ./README.md.new + cat ./README.md >> ./README.md.new && mv -f ./README.md.new ./README.md + python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" + make clean-cache + python -m build --no-isolation . + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true + + publish: + needs: [prepare, build] + permissions: + contents: write + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/download-artifact@v4 + with: + path: artifact + pattern: build-* + merge-multiple: true + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Generate release notes + env: + head_sha: ${{ needs.prepare.outputs.head_sha }} + target_repo: ${{ needs.prepare.outputs.target_repo }} + target_tag: ${{ needs.prepare.outputs.target_tag }} + run: | + printf '%s' \ + '[![Installation](https://img.shields.io/badge/-Which%20file%20to%20download%3F-white.svg?style=for-the-badge)]' \ + '(https://github.com/${{ github.repository }}#installation "Installation instructions") ' \ + '[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)]' \ + '(https://discord.gg/H5MNcFW63r "Discord") ' \ + '[![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)]' \ + '(https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators "Donate") ' \ + '[![Documentation](https://img.shields.io/badge/-Docs-brightgreen.svg?style=for-the-badge&logo=GitBook&labelColor=555555)]' \ + '(https://github.com/${{ github.repository }}' \ + '${{ env.target_repo == github.repository && format('/tree/{0}', env.target_tag) || '' }}#readme "Documentation") ' \ + ${{ env.target_repo == 'yt-dlp/yt-dlp' && '\ + "[![Nightly](https://img.shields.io/badge/Nightly%20builds-purple.svg?style=for-the-badge)]" \ + "(https://github.com/yt-dlp/yt-dlp-nightly-builds/releases/latest \"Nightly builds\") " \ + "[![Master](https://img.shields.io/badge/Master%20builds-lightblue.svg?style=for-the-badge)]" \ + "(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES + printf '\n\n' >> ./RELEASE_NOTES + cat >> ./RELEASE_NOTES << EOF + #### A description of the various files are in the [README](https://github.com/${{ github.repository }}#release-files) + --- + $(python ./devscripts/make_changelog.py -vv --collapsible) + EOF + printf '%s\n\n' '**This is a pre-release build**' >> ./PRERELEASE_NOTES + cat ./RELEASE_NOTES >> ./PRERELEASE_NOTES + printf '%s\n\n' 'Generated from: https://github.com/${{ github.repository }}/commit/${{ env.head_sha }}' >> ./ARCHIVE_NOTES + cat ./RELEASE_NOTES >> ./ARCHIVE_NOTES + + - name: Publish to archive repo + env: + GH_TOKEN: ${{ secrets[needs.prepare.outputs.target_repo_token] }} + GH_REPO: ${{ needs.prepare.outputs.target_repo }} + version: ${{ needs.prepare.outputs.version }} + channel: ${{ needs.prepare.outputs.channel }} + if: | + inputs.prerelease && env.GH_TOKEN != '' && env.GH_REPO != '' && env.GH_REPO != github.repository + run: | + title="${{ startswith(env.GH_REPO, 'yt-dlp/') && 'yt-dlp ' || '' }}${{ env.channel }}" + gh release create \ + --notes-file ARCHIVE_NOTES \ + --title "${title} ${{ env.version }}" \ + ${{ env.version }} \ + artifact/* + + - name: Prune old release + env: + GH_TOKEN: ${{ github.token }} + version: ${{ needs.prepare.outputs.version }} + target_repo: ${{ needs.prepare.outputs.target_repo }} + target_tag: ${{ needs.prepare.outputs.target_tag }} + if: | + env.target_repo == github.repository && env.target_tag != env.version + run: | + gh release delete --yes --cleanup-tag "${{ env.target_tag }}" || true + git tag --delete "${{ env.target_tag }}" || true + sleep 5 # Enough time to cover deletion race condition + + - name: Publish release + env: + GH_TOKEN: ${{ github.token }} + version: ${{ needs.prepare.outputs.version }} + target_repo: ${{ needs.prepare.outputs.target_repo }} + target_tag: ${{ needs.prepare.outputs.target_tag }} + head_sha: ${{ needs.prepare.outputs.head_sha }} + if: | + env.target_repo == github.repository + run: | + title="${{ github.repository == 'yt-dlp/yt-dlp' && 'yt-dlp ' || '' }}" + title+="${{ env.target_tag != env.version && format('{0} ', env.target_tag) || '' }}" + gh release create \ + --notes-file ${{ inputs.prerelease && 'PRERELEASE_NOTES' || 'RELEASE_NOTES' }} \ + --target ${{ env.head_sha }} \ + --title "${title}${{ env.version }}" \ + ${{ inputs.prerelease && '--prerelease' || '' }} \ + ${{ env.target_tag }} \ + artifact/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..630c2e0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,128 @@ +# Config +*.conf +cookies +*cookies.txt +.netrc + +# Downloaded +*.annotations.xml +*.aria2 +*.description +*.dump +*.frag +*.frag.aria2 +*.frag.urls +*.info.json +*.live_chat.json +*.meta +*.part* +*.tmp +*.temp +*.unknown_video +*.ytdl +.cache/ + +*.3gp +*.ape +*.ass +*.avi +*.desktop +*.f4v +*.flac +*.flv +*.gif +*.jpeg +*.jpg +*.lrc +*.m4a +*.m4v +*.mhtml +*.mkv +*.mov +*.mp3 +*.mp4 +*.mpg +*.mpga +*.oga +*.ogg +*.opus +*.png +*.sbv +*.srt +*.ssa +*.swf +*.swp +*.tt +*.ttml +*.url +*.vtt +*.wav +*.webloc +*.webm +*.webp + +# Allow config/media files in testdata +!test/** + +# Python +*.pyc +*.pyo +.pytest_cache +wine-py2exe/ +py2exe.log +build/ +dist/ +zip/ +tmp/ +venv/ +.venv/ +completions/ + +# Misc +*~ +*.DS_Store +*.kate-swp +MANIFEST +test/local_parameters.json +.coverage +cover/ +secrets/ +updates_key.pem +*.egg-info +.tox +*.class +*.isorted +*.stackdump + +# Generated +AUTHORS +README.txt +.mailmap +*.1 +*.bash-completion +*.fish +*.tar.gz +*.zsh +*.spec +test/testdata/sigs/player-*.js + +# Binary +/youtube-dl +/youtube-dlc +/yt-dlp +yt-dlp.zip +*.exe + +# Text Editor / IDE +.idea +*.iml +.vscode +*.sublime-* +*.code-workspace + +# Lazy extractors +*/extractor/lazy_extractors.py + +# Plugins +ytdlp_plugins/ +yt-dlp-plugins diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..c94ec55 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,731 @@ +# CONTRIBUTING TO YT-DLP + +- [OPENING AN ISSUE](#opening-an-issue) + - [Is the description of the issue itself sufficient?](#is-the-description-of-the-issue-itself-sufficient) + - [Are you using the latest version?](#are-you-using-the-latest-version) + - [Is the issue already documented?](#is-the-issue-already-documented) + - [Why are existing options not enough?](#why-are-existing-options-not-enough) + - [Have you read and understood the changes, between youtube-dl and yt-dlp](#have-you-read-and-understood-the-changes-between-youtube-dl-and-yt-dlp) + - [Is there enough context in your bug report?](#is-there-enough-context-in-your-bug-report) + - [Does the issue involve one problem, and one problem only?](#does-the-issue-involve-one-problem-and-one-problem-only) + - [Is anyone going to need the feature?](#is-anyone-going-to-need-the-feature) + - [Is your question about yt-dlp?](#is-your-question-about-yt-dlp) + - [Are you willing to share account details if needed?](#are-you-willing-to-share-account-details-if-needed) + - [Is the website primarily used for piracy](#is-the-website-primarily-used-for-piracy) +- [DEVELOPER INSTRUCTIONS](#developer-instructions) + - [Adding new feature or making overarching changes](#adding-new-feature-or-making-overarching-changes) + - [Adding support for a new site](#adding-support-for-a-new-site) + - [yt-dlp coding conventions](#yt-dlp-coding-conventions) + - [Mandatory and optional metafields](#mandatory-and-optional-metafields) + - [Provide fallbacks](#provide-fallbacks) + - [Regular expressions](#regular-expressions) + - [Long lines policy](#long-lines-policy) + - [Quotes](#quotes) + - [Inline values](#inline-values) + - [Collapse fallbacks](#collapse-fallbacks) + - [Trailing parentheses](#trailing-parentheses) + - [Use convenience conversion and parsing functions](#use-convenience-conversion-and-parsing-functions) + - [My pull request is labeled pending-fixes](#my-pull-request-is-labeled-pending-fixes) +- [EMBEDDING YT-DLP](README.md#embedding-yt-dlp) + + + +# OPENING AN ISSUE + +Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://github.com/yt-dlp/yt-dlp/issues). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in our [discord server](https://discord.gg/H5MNcFW63r). + +**Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: +``` +$ yt-dlp -vU +[debug] Command-line config: ['-v', 'demo.com'] +[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 +[debug] yt-dlp version 2021.09.25 (zip) +[debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29 +[debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 +[debug] Proxy map: {} +Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535 +yt-dlp is up to date (2021.09.25) +... +``` +**Do not post screenshots of verbose logs; only plain text is acceptable.** + +The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore will be closed as `incomplete`. + +The templates provided for the Issues, should be completed and **not removed**, this helps aide the resolution of the issue. + +Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): + +### Is the description of the issue itself sufficient? + +We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. + +So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious + +- What the problem is +- How it could be fixed +- How your proposed solution would look like + +If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. We often get frustrated by these issues, since the only possible way for us to move forward on them is to ask for clarification over and over. + +For bug reports, this means that your report should contain the **complete** output of yt-dlp when called with the `-vU` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. + +If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--write-pages` and upload the `.dump` files you get [somewhere](https://gist.github.com). + +**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. + +### Are you using the latest version? + +Before reporting any issue, type `yt-dlp -U`. This should report that you're up-to-date. This goes for feature requests as well. + +### Is the issue already documented? + +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/yt-dlp/yt-dlp/search?type=Issues) of this repository. If there is an issue, subscribe to it to be notified when there is any progress. Unless you have something useful to add to the conversation, please refrain from commenting. + +Additionally, it is also helpful to see if the issue has already been documented in the [youtube-dl issue tracker](https://github.com/ytdl-org/youtube-dl/issues). If similar issues have already been reported in youtube-dl (but not in our issue tracker), links to them can be included in your issue report here. + +### Why are existing options not enough? + +Before requesting a new feature, please have a quick peek at [the list of supported options](README.md#usage-and-options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. + +### Have you read and understood the changes, between youtube-dl and yt-dlp + +There are many changes between youtube-dl and yt-dlp [(changes to default behavior)](README.md#differences-in-default-behavior), and some of the options available have a different behaviour in yt-dlp, or have been removed all together [(list of changes to options)](README.md#deprecated-options). Make sure you have read and understand the differences in the options and how this may impact your downloads before opening an issue. + +### Is there enough context in your bug report? + +People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one). + +We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful. + +### Does the issue involve one problem, and one problem only? + +Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. + +In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of yt-dlp that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. + +### Is anyone going to need the feature? + +Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +### Is your question about yt-dlp? + +Some bug reports are completely unrelated to yt-dlp and relate to a different, or even the reporter's own, application. Please make sure that you are actually using yt-dlp. If you are using a UI for yt-dlp, report the bug to the maintainer of the actual application providing the UI. In general, if you are unable to provide the verbose log, you should not be opening the issue here. + +If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-dlp, the issue should be raised in the youtube-dl project. + +### Are you willing to share account details if needed? + +The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discretion whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it. + +By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials. + +While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. + +- Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. +- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). +- Change the password after receiving the account back. + +### Is the website primarily used for piracy? + +We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). + + + + +# DEVELOPER INSTRUCTIONS + +Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation). + +To run yt-dlp as a developer, you don't need to build anything either. Simply execute + + python3 -m yt_dlp + +To run all the available core tests, use: + + python3 devscripts/run_tests.py + +See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. + +If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile). + + +## Adding new feature or making overarching changes + +Before you start writing code for implementing a new feature, open an issue explaining your feature request and at least one use case. This allows the maintainers to decide whether such a feature is desired for the project in the first place, and will provide an avenue to discuss some implementation details. If you open a pull request for a new feature without discussing with us first, do not be surprised when we ask for large changes to the code, or even reject it outright. + +The same applies for changes to the documentation, code style, or overarching changes to the architecture + + +## Adding support for a new site + +If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#is-the-website-primarily-used-for-piracy)**. yt-dlp does **not support** such sites thus pull requests adding support for them **will be rejected**. + +After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): + +1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork) +1. Check out the source code with: + + git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git + +1. Start a new git branch with + + cd yt-dlp + git checkout -b yourextractor + +1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`: + + ```python + from .common import InfoExtractor + + + class YourExtractorIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://yourextractor.com/watch/42', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + # For videos, only the 'id' and 'ext' fields are required to RUN the test: + 'id': '42', + 'ext': 'mp4', + # Then if the test run fails, it will output the missing/incorrect fields. + # Properties can be added as: + # * A value, e.g. + # 'title': 'Video title goes here', + # * MD5 checksum; start the string with 'md5:', e.g. + # 'description': 'md5:098f6bcd4621d373cade4e832627b4f6', + # * A regular expression; start the string with 're:', e.g. + # 'thumbnail': r're:^https?://.*\.jpg$', + # * A count of elements in a list; start the string with 'count:', e.g. + # 'tags': 'count:10', + # * Any Python type, e.g. + # 'view_count': int, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + # TODO more properties (see yt_dlp/extractor/common.py) + } + ``` +1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. +1. Run `python3 devscripts/run_tests.py YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all` +1. Make sure you have at least one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. +1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want. +1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): + + $ flake8 yt_dlp/extractor/yourextractor.py + +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. +1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: + + $ git add yt_dlp/extractor/_extractors.py + $ git add yt_dlp/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add extractor' + $ git push origin yourextractor + +1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. + +In any case, thank you very much for your contributions! + +**Tip:** To test extractors that require login information, create a file `test/local_parameters.json` and add `"usenetrc": true` or your `username`&`password` or `cookiefile`/`cookiesfrombrowser` in it: +```json +{ + "username": "your user name", + "password": "your password" +} +``` + +## yt-dlp coding conventions + +This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. + +Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old yt-dlp versions working. Even though this breakage issue may be easily fixed by a new version of yt-dlp, this could take some time, during which the extractor will remain broken. + + +### Mandatory and optional metafields + +For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: + + - `id` (media identifier) + - `title` (media title) + - `url` (media download URL) or `formats` + +The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal. + +For pornographic sites, appropriate `age_limit` must also be returned. + +The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract useful information with `--ignore-no-formats-error` - e.g. when the video is a live stream that has not started yet. + +[Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. + +#### Example + +Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`: + +```python +meta = self._download_json(url, video_id) +``` + +Assume at this point `meta`'s layout is: + +```python +{ + "summary": "some fancy summary text", + "user": { + "name": "uploader name" + }, + ... +} +``` + +Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: + +```python +description = meta.get('summary') # correct +``` + +and not like: + +```python +description = meta['summary'] # incorrect +``` + +The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data). + + +If the data is nested, do not use `.get` chains, but instead make use of `traverse_obj`. + +Considering the above `meta` again, assume you want to extract `["user"]["name"]` and put it in the resulting info dict as `uploader` + +```python +uploader = traverse_obj(meta, ('user', 'name')) # correct +``` + +and not like: + +```python +uploader = meta['user']['name'] # incorrect +``` +or +```python +uploader = meta.get('user', {}).get('name') # incorrect +``` +or +```python +uploader = try_get(meta, lambda x: x['user']['name']) # old utility +``` + + +Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance: + +```python +description = self._search_regex( + r']+id="title"[^>]*>([^<]+)<', + webpage, 'description', fatal=False) +``` + +With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction. + +You can also pass `default=`, for example: + +```python +description = self._search_regex( + r']+id="title"[^>]*>([^<]+)<', + webpage, 'description', default=None) +``` + +On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. + + +Another thing to remember is not to try to iterate over `None` + +Say you extracted a list of thumbnails into `thumbnail_data` and want to iterate over them + +```python +thumbnail_data = data.get('thumbnails') or [] +thumbnails = [{ + 'url': item['url'], + 'height': item.get('h'), +} for item in thumbnail_data if item.get('url')] # correct +``` + +and not like: + +```python +thumbnail_data = data.get('thumbnails') +thumbnails = [{ + 'url': item['url'], + 'height': item.get('h'), +} for item in thumbnail_data] # incorrect +``` + +In this case, `thumbnail_data` will be `None` if the field was not found and this will cause the loop `for item in thumbnail_data` to raise a fatal error. Using `or []` avoids this error and results in setting an empty list in `thumbnails` instead. + +Alternately, this can be further simplified by using `traverse_obj` + +```python +thumbnails = [{ + 'url': item['url'], + 'height': item.get('h'), +} for item in traverse_obj(data, ('thumbnails', lambda _, v: v['url']))] +``` + +or, even better, + +```python +thumbnails = traverse_obj(data, ('thumbnails', ..., {'url': 'url', 'height': 'h'})) +``` + +### Provide fallbacks + +When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. + + +#### Example + +Say `meta` from the previous example has a `title` and you are about to extract it like: + +```python +title = meta.get('title') +``` + +If `title` disappears from `meta` in future due to some changes on the hoster's side the title extraction would fail. + +Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback like: + +```python +title = meta.get('title') or self._og_search_title(webpage) +``` + +This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`, making the extractor more robust. + + +### Regular expressions + +#### Don't capture groups you don't use + +Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing. + +##### Example + +Don't capture id attribute name here since you can't use it for anything anyway. + +Correct: + +```python +r'(?:id|ID)=(?P\d+)' +``` + +Incorrect: +```python +r'(id|ID)=(?P\d+)' +``` + +#### Make regular expressions relaxed and flexible + +When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. + +##### Example + +Say you need to extract `title` from the following HTML code: + +```html +some fancy title +``` + +The code for that task should look similar to: + +```python +title = self._search_regex( # correct + r']+class="title"[^>]*>([^<]+)', webpage, 'title') +``` + +which tolerates potential changes in the `style` attribute's value. Or even better: + +```python +title = self._search_regex( # correct + r']+class=(["\'])title\1[^>]*>(?P[^<]+)', + webpage, 'title', group='title') +``` + +which also handles both single quotes in addition to double quotes. + +The code definitely should not look like: + +```python +title = self._search_regex( # incorrect + r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>', + webpage, 'title', group='title') +``` + +or even + +```python +title = self._search_regex( # incorrect + r'<span style=".*?" class="title">(.*?)</span>', + webpage, 'title', group='title') +``` + +Here the presence or absence of other attributes including `style` is irrelevant for the data we need, and so the regex must not depend on it + + +#### Keep the regular expressions as simple as possible, but no simpler + +Since many extractors deal with unstructured data provided by websites, we will often need to use very complex regular expressions. You should try to use the *simplest* regex that can accomplish what you want. In other words, each part of the regex must have a reason for existing. If you can take out a symbol and the functionality does not change, the symbol should not be there. + +##### Example + +Correct: + +```python +_VALID_URL = r'https?://(?:www\.)?website\.com/(?:[^/]+/){3,4}(?P<display_id>[^/]+)_(?P<id>\d+)' +``` + +Incorrect: + +```python +_VALID_URL = r'https?:\/\/(?:www\.)?website\.com\/[^\/]+/[^\/]+/[^\/]+(?:\/[^\/]+)?\/(?P<display_id>[^\/]+)_(?P<id>\d+)' +``` + +#### Do not misuse `.` and use the correct quantifiers (`+*?`) + +Avoid creating regexes that over-match because of wrong use of quantifiers. Also try to avoid non-greedy matching (`?`) where possible since they could easily result in [catastrophic backtracking](https://www.regular-expressions.info/catastrophic.html) + +Correct: + +```python +title = self._search_regex(r'<span\b[^>]+class="title"[^>]*>([^<]+)', webpage, 'title') +``` + +Incorrect: + +```python +title = self._search_regex(r'<span\b.*class="title".*>(.+?)<', webpage, 'title') +``` + + +### Long lines policy + +There is a soft limit to keep lines of code under 100 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. Sometimes, it may be reasonable to go upto 120 characters and sometimes even 80 can be unreadable. Keep in mind that this is not a hard limit and is just one of many tools to make the code more readable. + +For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: + +Conversely, don't unnecessarily split small lines further. As a rule of thumb, if removing the line split keeps the code under 80 characters, it should be a single line. + +##### Examples + +Correct: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + +Incorrect: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=' +'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + +Correct: + +```python +uploader = traverse_obj(info, ('uploader', 'name'), ('author', 'fullname')) +``` + +Incorrect: + +```python +uploader = traverse_obj( + info, + ('uploader', 'name'), + ('author', 'fullname')) +``` + +Correct: + +```python +formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', + note='Downloading HD m3u8 information', errnote='Unable to download HD m3u8 information') +``` + +Incorrect: + +```python +formats = self._extract_m3u8_formats(m3u8_url, + video_id, + 'mp4', + 'm3u8_native', + m3u8_id='hls', + note='Downloading HD m3u8 information', + errnote='Unable to download HD m3u8 information') +``` + + +### Quotes + +Always use single quotes for strings (even if the string has `'`) and double quotes for docstrings. Use `'''` only for multi-line strings. An exception can be made if a string has multiple single quotes in it and escaping makes it *significantly* harder to read. For f-strings, use you can use double quotes on the inside. But avoid f-strings that have too many quotes inside. + + +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Examples + +Correct: + +```python +return { + 'title': self._html_search_regex(r'<h1>([^<]+)</h1>', webpage, 'title'), + # ...some lines of code... +} +``` + +Incorrect: + +```python +TITLE_RE = r'<h1>([^<]+)</h1>' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +# ...some lines of code... +return { + 'title': title, + # ...some lines of code... +} +``` + + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + + +### Trailing parentheses + +Always move trailing parentheses used for grouping/functions after the last argument. On the other hand, multi-line literal list/tuple/dict/set should closed be in a new line. Generators and list/dict comprehensions may use either style + +#### Examples + +Correct: + +```python +url = traverse_obj(info, ( + 'context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), list) +``` +Correct: + +```python +url = traverse_obj( + info, + ('context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), + list) +``` + +Incorrect: + +```python +url = traverse_obj( + info, + ('context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), + list +) +``` + +Correct: + +```python +f = { + 'url': url, + 'format_id': format_id, +} +``` + +Incorrect: + +```python +f = {'url': url, + 'format_id': format_id} +``` + +Correct: + +```python +formats = [process_formats(f) for f in format_data + if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable')] +``` + +Correct: + +```python +formats = [ + process_formats(f) for f in format_data + if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable') +] +``` + + +### Use convenience conversion and parsing functions + +Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. + +Use `url_or_none` for safe URL processing. + +Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON. + +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. + +Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions. + +#### Examples + +```python +description = traverse_obj(response, ('result', 'video', 'summary'), expected_type=str) +thumbnails = traverse_obj(response, ('result', 'thumbnails', ..., 'url'), expected_type=url_or_none) +video = traverse_obj(response, ('result', 'video', 0), default={}, expected_type=dict) +duration = float_or_none(video.get('durationMs'), scale=1000) +view_count = int_or_none(video.get('views')) +``` + + +# My pull request is labeled pending-fixes + +The `pending-fixes` label is added when there are changes requested to a PR. When the necessary changes are made, the label should be removed. However, despite our best efforts, it may sometimes happen that the maintainer did not see the changes or forgot to remove the label. If your PR is still marked as `pending-fixes` a few days after all requested changes have been made, feel free to ping the maintainer who labeled your issue and ask them to re-review and remove the label. + + + + +# EMBEDDING YT-DLP +See [README.md#embedding-yt-dlp](README.md#embedding-yt-dlp) for instructions on how to embed yt-dlp in another Python program diff --git a/CONTRIBUTORS b/CONTRIBUTORS new file mode 100644 index 0000000..6ee3baa --- /dev/null +++ b/CONTRIBUTORS @@ -0,0 +1,602 @@ +pukkandan (owner) +shirt-dev (collaborator) +coletdjnz/colethedj (collaborator) +Ashish0804 (collaborator) +bashonly (collaborator) +Grub4K (collaborator) +h-h-h-h +pauldubois98 +nixxo +GreyAlien502 +kyuyeunk +siikamiika +jbruchon +alexmerkel +glenn-slayden +Unrud +wporr +mariuszskon +ohnonot +samiksome +alxnull +FelixFrog +Zocker1999NET +kurumigi +bbepis +animelover1984/horahoradev +Pccode66 +RobinD42 +hseg +DennyDai +codeasashu +teesid +kevinoconnor7 +damianoamatruda +2ShedsJackson +CXwudi +xtkoba +llacb47 +hheimbuerger +B0pol +lkho +fstirlitz +Lamieur +tsukumijima +Hadi0609 +b5eff52 +craftingmod +tpikonen +tripulse +king-millez +alex-gedeon +hhirtz +louie-github +MinePlayersPE +olifre +rhsmachine/zenerdi0de +nihil-admirari +krichbanana +ohmybahgosh +nyuszika7h +blackjack4494 +pyx +TpmKranz +mzbaulhaque +zackmark29 +mbway +zerodytrash +wesnm +pento +rigstot +dirkf +funniray +Jessecar96 +jhwgh1968 +kikuyan +max-te +nchilada +pgaig +PSlava +stdedos +u-spec-png +Sipherdrakon +kidonng +smege1001 +tandy1000 +IONECarter +capntrips +mrfade +ParadoxGBB +wlritchi +NeroBurner +mahanstreamer +alerikaisattera +Derkades +BunnyHelp +i6t +std-move +Chocobozzz +ouwou +korli +octotherp +CeruleanSky +zootedb0t +chao813 +ChillingPepper +ConquerorDopy +dalanmiller +DigitalDJ +f4pp3rk1ng +gesa +Jules-A +makeworld-the-better-one +MKSherbini +mrx23dot +poschi3 +raphaeldore +renalid +sleaux-meaux +sulyi +tmarki +Vangelis66 +AjaxGb +ajj8 +jakubadamw +jfogelman +timethrow +sarnoud +Bojidarist +18928172992817182/gustaf +nixklai +smplayer-dev +Zirro +CrypticSignal +flashdagger +fractalf +frafra +kaz-us +ozburo +rhendric +sdomi +selfisekai +stanoarn +0xA7404A/Aurora +4a1e2y5 +aarubui +chio0hai +cntrl-s +Deer-Spangle +DEvmIb +Grabien/MaximVol +j54vc1bk +mpeter50 +mrpapersonic +pabs3 +staubichsauger +xenova +Yakabuff +zulaport +ehoogeveen-medweb +PilzAdam +zmousm +iw0nderhow +unit193 +TwoThousandHedgehogs/KathrynElrod +Jertzukka +cypheron +Hyeeji +bwildenhain +C0D3D3V +kebianizao +Lapin0t +abdullah-if +DavidSkrundz +mkubecek +raleeper +YuenSzeHong +Sematre +jaller94 +r5d +julien-hadleyjack +git-anony-mouse +mdawar +trassshhub +foghawk +k3ns1n +teridon +mozlima +timendum +ischmidt20 +CreaValix +sian1468 +arkamar +hyano +KiberInfinity +tejing1 +Bricio +lazypete365 +Aniruddh-J +blackgear +CplPwnies +cyberfox1691 +FestplattenSchnitzel +hatienl0i261299 +iphoting +jakeogh +lukasfink1 +lyz-code +marieell +mdpauley +Mipsters +mxmehl +ofkz +P-reducible +pycabbage +regarten +Ronnnny +schn0sch +s0u1h +MrRawes +cffswb +danielyli +1-Byte +mehq +dzek69 +aaearon +panatexxa +kmark +un-def +goggle +Soebb +Fam0r +bohwaz +dodrian +vvto33 +ca-za +connercsbn +diegorodriguezv +ekangmonyet +elyse0 +evansp +GiedriusS +HE7086 +JordanWeatherby +m4tu4g +MarwenDallel +nevack +putnam +rand-net +vertan +Wikidepia +Yipten +moench-tegeder +christoph-heinrich +HobbyistDev +LunarFang416 +sbor23 +aurelg +adamanldo +gamer191 +vkorablin +Burve +mnn +ZhymabekRoman +mozbugbox +aejdl +ping +sqrtNOT +bubbleguuum +darkxex +miseran +StefanLobbenmeier +crazymoose77756 +nomevi +Brett824 +pingiun +dosy4ev +EhtishamSabir +Ferdi265 +FirefoxMetzger +ftk +lamby +llamasblade +lockmatrix +misaelaguayo +odo2063 +pritam20ps05 +scy +sheerluck +AxiosDeminence +DjesonPV +eren-kemer +freezboltz +Galiley +haobinliang +Mehavoid +winterbird-code +yashkc2025 +aldoridhoni +jacobtruman +masta79 +palewire +cgrigis +DavidH-2022 +dfaker +jackyyf +ohaiibuzzle +SamantazFox +shreyasminocha +tejasa97 +xenov +satan1st +0xGodspeed +5736d79 +587021c +basrieter +Bobscorn +CNugteren +columndeeply +DoubleCouponDay +Fabi019 +GautamMKGarg +itachi-19 +jeroenj +josanabr +LiviaMedeiros +nikita-moor +snapdgn +SuperSonicHub1 +tannertechnology +Timendum +tobi1805 +TokyoBlackHole +ajayyy +Alienmaster +bsun0000 +changren-wcr +ClosedPort22 +CrankDatSouljaBoy +cruel-efficiency +endotronic +Generator +gibson042 +How-Bout-No +invertico +jahway603 +jwoglom +lksj +megapro17 +mlampe +MrOctopus +nosoop +puc9 +sashashura +schnusch +SG5 +the-marenga +tkgmomosheep +vitkhab +glensc +synthpop123 +tntmod54321 +milkknife +Bnyro +CapacitorSet +stelcodes +skbeh +muddi900 +digitall +chengzhicn +mexus +JChris246 +redraskal +Spicadox +barsnick +docbender +KurtBestor +Chrissi2812 +FrederikNS +gschizas +JC-Chung +mzhou +OndrejBakan +ab4cbef +aionescu +amra +ByteDream +carusocr +chexxor +felixonmars +FrankZ85 +FriedrichRehren +gregsadetsky +LeoniePhiline +LowSuggestion912 +Matumo +OIRNOIR +OMEGARAZER +oxamun +pmitchell86 +qbnu +qulaz +rebane2001 +road-master +rohieb +sdht0 +seproDev +Hill-98 +LXYan2333 +mushbite +venkata-krishnas +7vlad7 +alexklapheke +arobase-che +bepvte +bergoid +blmarket +brandon-dacrib +c-basalt +CoryTibbettsDev +Cyberes +D0LLYNH0 +danog +DataGhost +falbrechtskirchinger +foreignBlade +garret1317 +hasezoey +hoaluvn +ItzMaxTV +ivanskodje +jo-nike +kangalio +linsui +makew0rld +menschel +mikf +mrscrapy +NDagestad +Neurognostic +NextFire +nick-cd +permunkle +pzhlkj6612 +ringus1 +rjy +Schmoaaaaah +sjthespian +theperfectpunk +toomyzoom +truedread +TxI5 +unbeatable-101 +vampirefrog +vidiot720 +viktor-enzell +zhgwn +barthelmannk +berkanteber +OverlordQ +rexlambert22 +Ti4eeT4e +AmanSal1 +bbilly1 +meliber +nnoboa +rdamas +RfadnjdExt +urectanc +nao20010128nao/Lesmiscore +04-pasha-04 +aaruni96 +aky-01 +AmirAflak +ApoorvShah111 +at-wat +davinkevin +demon071 +denhotte +FinnRG +fireattack +Frankgoji +GD-Slime +hatsomatt +ifan-t +kshitiz305 +kylegustavo +mabdelfattah +nathantouze +niemands +Rajeshwaran2001 +RedDeffender +Rohxn16 +sb0stn +SevenLives +simon300000 +snixon +soundchaser128 +szabyg +trainman261 +trislee +wader +Yalab7 +zhallgato +zhong-yiyu +Zprokkel +AS6939 +drzraf +handlerug +jiru +madewokherd +xofe +awalgarg +midnightveil +naginatana +Riteo +1100101 +aniolpages +bartbroere +CrendKing +Esokrates +HitomaruKonpaku +LoserFox +peci1 +saintliao +shubhexists +SirElderling +almx +elivinsky +starius +TravisDupes +amir16yp +Fymyte +Ganesh910 +hashFactory +kclauhk +Kyraminol +lstrojny +middlingphys +NickCis +nicodato +prettykool +S-Aarab +sonmezberkay +TSRBerry +114514ns +agibson-fl +alard +alien-developers +antonkesy +ArnauvGilotra +Arthurszzz +Bibhav48 +Bl4Cc4t +boredzo +Caesim404 +chkuendig +chtk +Danish-H +dasidiot +diman8 +divStar +DmitryScaletta +feederbox826 +gmes78 +gonzalezjo +hui1601 +infanf +jazz1611 +jingtra +jkmartindale +johnvictorfs +llistochek +marcdumais +martinxyz +michal-repo +mrmedieval +nbr23 +Nicals +Noor-5 +NurTasin +pompos02 +Pranaxcau +pwaldhauer +RaduManole +RalphORama +rrgomes +ruiminggu +rvsit +sefidel +shmohawk +Snack-X +src-tinkerer +stilor +syntaxsurge +t-nil +ufukk +vista-narvas +x11x +xpadev-net +Xpl0itU +YoshichikaAAA +zhijinwuu diff --git a/Changelog.md b/Changelog.md new file mode 100644 index 0000000..45a9cef --- /dev/null +++ b/Changelog.md @@ -0,0 +1,4280 @@ +# Changelog + +<!-- +# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master +--> + +### 2024.03.10 + +#### Core changes +- [Add `--compat-options 2023`](https://github.com/yt-dlp/yt-dlp/commit/3725b4f0c93ca3943e6300013a9670e4ab757fda) ([#9084](https://github.com/yt-dlp/yt-dlp/issues/9084)) by [Grub4K](https://github.com/Grub4K) (With fixes in [ffff1bc](https://github.com/yt-dlp/yt-dlp/commit/ffff1bc6598fc7a9258e51bc153cab812467f9f9) by [pukkandan](https://github.com/pukkandan)) +- [Create `ydl._request_director` when needed](https://github.com/yt-dlp/yt-dlp/commit/069b2aedae2279668b6051627a81fc4fbd9c146a) by [pukkandan](https://github.com/pukkandan) (With fixes in [dbd8b1b](https://github.com/yt-dlp/yt-dlp/commit/dbd8b1bff9afd8f05f982bcd52c20bc173c266ca) by [Grub4k](https://github.com/Grub4k)) +- [Don't select storyboard formats as fallback](https://github.com/yt-dlp/yt-dlp/commit/d63eae7e7ffb1f3e733e552b9e5e82355bfba214) by [bashonly](https://github.com/bashonly) +- [Handle `--load-info-json` format selection errors](https://github.com/yt-dlp/yt-dlp/commit/263a4b55ac17a796e8991ca8d2d86a3c349f8a60) ([#9392](https://github.com/yt-dlp/yt-dlp/issues/9392)) by [bashonly](https://github.com/bashonly) +- [Warn user when not launching through shell on Windows](https://github.com/yt-dlp/yt-dlp/commit/6a6cdcd1824a14e3b336332c8f31f65497b8c4b8) ([#9250](https://github.com/yt-dlp/yt-dlp/issues/9250)) by [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **cookies** + - [Fix `--cookies-from-browser` for `snap` Firefox](https://github.com/yt-dlp/yt-dlp/commit/cbed249aaa053a3f425b9bafc97f8dbd71c44487) ([#9016](https://github.com/yt-dlp/yt-dlp/issues/9016)) by [Grub4K](https://github.com/Grub4K) + - [Fix `--cookies-from-browser` with macOS Firefox profiles](https://github.com/yt-dlp/yt-dlp/commit/85b33f5c163f60dbd089a6b9bc2ba1366d3ddf93) ([#8909](https://github.com/yt-dlp/yt-dlp/issues/8909)) by [RalphORama](https://github.com/RalphORama) + - [Improve error message for Windows `--cookies-from-browser chrome` issue](https://github.com/yt-dlp/yt-dlp/commit/2792092afd367e39251ace1fb2819c855ab8919f) ([#9080](https://github.com/yt-dlp/yt-dlp/issues/9080)) by [Grub4K](https://github.com/Grub4K) +- **plugins**: [Handle `PermissionError`](https://github.com/yt-dlp/yt-dlp/commit/9a8afadd172b7cab143f0049959fa64973589d94) ([#9229](https://github.com/yt-dlp/yt-dlp/issues/9229)) by [pukkandan](https://github.com/pukkandan), [syntaxsurge](https://github.com/syntaxsurge) +- **utils** + - [Improve `repr` of `DateRange`, `match_filter_func`](https://github.com/yt-dlp/yt-dlp/commit/45491a2a30da4d1723cfa9288cb664813bb09afb) by [pukkandan](https://github.com/pukkandan) + - `traverse_obj`: [Support `xml.etree.ElementTree.Element`](https://github.com/yt-dlp/yt-dlp/commit/ffbd4f2a02fee387ea5e0a267ce32df5259111ac) ([#8911](https://github.com/yt-dlp/yt-dlp/issues/8911)) by [Grub4K](https://github.com/Grub4K) +- **webvtt**: [Don't parse single fragment files](https://github.com/yt-dlp/yt-dlp/commit/f24e44e8cbd88ce338d52f594a19330f64d38b50) ([#9034](https://github.com/yt-dlp/yt-dlp/issues/9034)) by [seproDev](https://github.com/seproDev) + +#### Extractor changes +- [Migrate commonly plural fields to lists](https://github.com/yt-dlp/yt-dlp/commit/104a7b5a46dc1805157fb4cc11c05876934d37c1) ([#8917](https://github.com/yt-dlp/yt-dlp/issues/8917)) by [llistochek](https://github.com/llistochek), [pukkandan](https://github.com/pukkandan) (With fixes in [b136e2a](https://github.com/yt-dlp/yt-dlp/commit/b136e2af341f7a88028aea4c5cd50efe2fa9b182) by [bashonly](https://github.com/bashonly)) +- [Support multi-period MPD streams](https://github.com/yt-dlp/yt-dlp/commit/4ce57d3b873c2887814cbec03d029533e82f7db5) ([#6654](https://github.com/yt-dlp/yt-dlp/issues/6654)) by [alard](https://github.com/alard), [pukkandan](https://github.com/pukkandan) +- **abematv** + - [Fix extraction with cache](https://github.com/yt-dlp/yt-dlp/commit/c51316f8a69fbd0080f2720777d42ab438e254a3) ([#8895](https://github.com/yt-dlp/yt-dlp/issues/8895)) by [sefidel](https://github.com/sefidel) + - [Support login for playlists](https://github.com/yt-dlp/yt-dlp/commit/8226a3818f804478c756cf460baa9bf3a3b062a5) ([#8901](https://github.com/yt-dlp/yt-dlp/issues/8901)) by [sefidel](https://github.com/sefidel) +- **adn** + - [Add support for German site](https://github.com/yt-dlp/yt-dlp/commit/5eb1458be4767385a9bf1d570ff08e46100cbaa2) ([#8708](https://github.com/yt-dlp/yt-dlp/issues/8708)) by [infanf](https://github.com/infanf) + - [Improve auth error handling](https://github.com/yt-dlp/yt-dlp/commit/9526b1f179d19f75284eceaa5e0ee381af18cf19) ([#9068](https://github.com/yt-dlp/yt-dlp/issues/9068)) by [infanf](https://github.com/infanf) +- **aenetworks**: [Rating should be optional for AP extraction](https://github.com/yt-dlp/yt-dlp/commit/014cb5774d7afe624b6eb4e07f7be924b9e5e186) ([#9005](https://github.com/yt-dlp/yt-dlp/issues/9005)) by [agibson-fl](https://github.com/agibson-fl) +- **altcensored**: channel: [Fix playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/e28e135d6fd6a430fed3e20dfe1a8c8bbc5f9185) ([#9297](https://github.com/yt-dlp/yt-dlp/issues/9297)) by [marcdumais](https://github.com/marcdumais) +- **amadeustv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e641aab7a61df7406df60ebfe0c77bd5186b2b41) ([#8744](https://github.com/yt-dlp/yt-dlp/issues/8744)) by [ArnauvGilotra](https://github.com/ArnauvGilotra) +- **ant1newsgrembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1ed5ee2f045f717e814f84ba461dadc58e712266) ([#9191](https://github.com/yt-dlp/yt-dlp/issues/9191)) by [seproDev](https://github.com/seproDev) +- **archiveorg**: [Fix format URL encoding](https://github.com/yt-dlp/yt-dlp/commit/3894ab9574748188bbacbd925a3971eda6fa2bb0) ([#9279](https://github.com/yt-dlp/yt-dlp/issues/9279)) by [bashonly](https://github.com/bashonly) +- **ard** + - mediathek + - [Revert to using old id](https://github.com/yt-dlp/yt-dlp/commit/b6951271ac014761c9c317b9cecd5e8e139cfa7c) ([#8916](https://github.com/yt-dlp/yt-dlp/issues/8916)) by [Grub4K](https://github.com/Grub4K) + - [Support cookies to verify age](https://github.com/yt-dlp/yt-dlp/commit/c099ec9392b0283dde34b290d1a04158ad8eb882) ([#9037](https://github.com/yt-dlp/yt-dlp/issues/9037)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- **art19**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/999ea80beb053491089d256104c4188aced3110f) ([#9099](https://github.com/yt-dlp/yt-dlp/issues/9099)) by [seproDev](https://github.com/seproDev) +- **artetv**: [Separate closed captions](https://github.com/yt-dlp/yt-dlp/commit/393b487a4ea391c44e811505ec98531031d7e81e) ([#8231](https://github.com/yt-dlp/yt-dlp/issues/8231)) by [Nicals](https://github.com/Nicals), [seproDev](https://github.com/seproDev) +- **asobichannel**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/12f042740550c06552819374e2251deb7a519bab) ([#8700](https://github.com/yt-dlp/yt-dlp/issues/8700)) by [Snack-X](https://github.com/Snack-X) +- **bigo**: [Fix JSON extraction](https://github.com/yt-dlp/yt-dlp/commit/85a2d07c1f82c2082b568963d1c32ad3fc848f61) ([#8893](https://github.com/yt-dlp/yt-dlp/issues/8893)) by [DmitryScaletta](https://github.com/DmitryScaletta) +- **bilibili** + - [Add referer header and fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1713c882730a928ac344c099874d2093fc2c8b51) ([#8832](https://github.com/yt-dlp/yt-dlp/issues/8832)) by [SirElderling](https://github.com/SirElderling) (With fixes in [f1570ab](https://github.com/yt-dlp/yt-dlp/commit/f1570ab84d5f49564256c620063d2d3e9ed4acf0) by [TobiX](https://github.com/TobiX)) + - [Support `--no-playlist`](https://github.com/yt-dlp/yt-dlp/commit/e439693f729daf6fb15457baea1bca10ef5da34d) ([#9139](https://github.com/yt-dlp/yt-dlp/issues/9139)) by [c-basalt](https://github.com/c-basalt) +- **bilibilisearch**: [Set cookie to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/ffa017cfc5973b265c92248546fcf5020dc43eaf) ([#9119](https://github.com/yt-dlp/yt-dlp/issues/9119)) by [c-basalt](https://github.com/c-basalt) +- **biliintl**: [Fix and improve subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/cf6413e840476c15e5b166dc2f7cc2a90a4a9aad) ([#7077](https://github.com/yt-dlp/yt-dlp/issues/7077)) by [dirkf](https://github.com/dirkf), [HobbyistDev](https://github.com/HobbyistDev), [itachi-19](https://github.com/itachi-19), [seproDev](https://github.com/seproDev) +- **boosty**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/540b68298192874c75ad5ee4589bed64d02a7d55) ([#9144](https://github.com/yt-dlp/yt-dlp/issues/9144)) by [un-def](https://github.com/un-def) +- **ccma**: [Extract 1080p DASH formats](https://github.com/yt-dlp/yt-dlp/commit/4253e3b7f483127bd812bdac02466f4a5b47ff34) ([#9130](https://github.com/yt-dlp/yt-dlp/issues/9130)) by [seproDev](https://github.com/seproDev) +- **cctv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/6ad11fef65474bcf70f3a8556850d93c141e44a2) ([#9325](https://github.com/yt-dlp/yt-dlp/issues/9325)) by [src-tinkerer](https://github.com/src-tinkerer) +- **chzzk** + - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/ba6b0c8261e9f0a6373885736ff90a89dd1fb614) ([#8887](https://github.com/yt-dlp/yt-dlp/issues/8887)) by [DmitryScaletta](https://github.com/DmitryScaletta) + - live: [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/804f2366117b7065552a1c3cddb9ec19b688a5c1) ([#9309](https://github.com/yt-dlp/yt-dlp/issues/9309)) by [hui1601](https://github.com/hui1601) +- **cineverse**: [Detect when login required](https://github.com/yt-dlp/yt-dlp/commit/fc2cc626f07328a6c71b5e21853e4cfa7b1e6256) ([#9081](https://github.com/yt-dlp/yt-dlp/issues/9081)) by [garret1317](https://github.com/garret1317) +- **cloudflarestream** + - [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/4d9dc0abe24ad5d9d22a16f40fc61137dcd103f7) ([#9007](https://github.com/yt-dlp/yt-dlp/issues/9007)) by [Bibhav48](https://github.com/Bibhav48) + - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/f3d5face83f948c24bcb91e06d4fa6e8622d7d79) ([#9280](https://github.com/yt-dlp/yt-dlp/issues/9280)) by [bashonly](https://github.com/bashonly) + - [Improve embed detection](https://github.com/yt-dlp/yt-dlp/commit/464c919ea82aefdf35f138a1ab2dd0bb8fb7fd0e) ([#9287](https://github.com/yt-dlp/yt-dlp/issues/9287)) by [bashonly](https://github.com/bashonly) +- **cloudycdn, lsm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/5dda3b291f59f388f953337e9fb09a94b64aaf34) ([#8643](https://github.com/yt-dlp/yt-dlp/issues/8643)) by [Caesim404](https://github.com/Caesim404) +- **cnbc**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/998dffb5a2343ec709b3d6bbf2bf019649080239) ([#8741](https://github.com/yt-dlp/yt-dlp/issues/8741)) by [gonzalezjo](https://github.com/gonzalezjo), [Noor-5](https://github.com/Noor-5), [ruiminggu](https://github.com/ruiminggu), [seproDev](https://github.com/seproDev), [zhijinwuu](https://github.com/zhijinwuu) +- **craftsy**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/96f3924bac174f2fd401f86f78e77d7e0c5ee008) ([#9384](https://github.com/yt-dlp/yt-dlp/issues/9384)) by [bashonly](https://github.com/bashonly) +- **crooksandliars**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/03536126d32bd861e38536371f0cd5f1b71dcb7a) ([#9192](https://github.com/yt-dlp/yt-dlp/issues/9192)) by [seproDev](https://github.com/seproDev) +- **crtvg**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/785ab1af7f131e73444634ad57b39478651a43d3) ([#9404](https://github.com/yt-dlp/yt-dlp/issues/9404)) by [Xpl0itU](https://github.com/Xpl0itU) +- **dailymotion**: [Support search](https://github.com/yt-dlp/yt-dlp/commit/11ffa92a61e5847b3dfa8975f91ecb3ac2178841) ([#8292](https://github.com/yt-dlp/yt-dlp/issues/8292)) by [drzraf](https://github.com/drzraf), [seproDev](https://github.com/seproDev) +- **douyin**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9ff946645568e71046487571eefa9cb524a5189b) ([#9239](https://github.com/yt-dlp/yt-dlp/issues/9239)) by [114514ns](https://github.com/114514ns), [bashonly](https://github.com/bashonly) (With fixes in [e546e5d](https://github.com/yt-dlp/yt-dlp/commit/e546e5d3b33a50075e574a2e7b8eda7ea874d21e) by [bashonly](https://github.com/bashonly)) +- **duboku**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d4187da90a6b85f4ebae4bb07693cc9b412d75) ([#9161](https://github.com/yt-dlp/yt-dlp/issues/9161)) by [DmitryScaletta](https://github.com/DmitryScaletta) +- **dumpert**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/eedb38ce4093500e19279d50b708fb9c18bf4dbf) ([#9320](https://github.com/yt-dlp/yt-dlp/issues/9320)) by [rvsit](https://github.com/rvsit) +- **elementorembed**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6171b050d70435008e64fa06aa6f19c4e5bec75f) ([#8948](https://github.com/yt-dlp/yt-dlp/issues/8948)) by [pompos02](https://github.com/pompos02), [seproDev](https://github.com/seproDev) +- **eporner**: [Extract AV1 formats](https://github.com/yt-dlp/yt-dlp/commit/96d0f8c1cb8aec250c5614bfde6b5fb95f10819b) ([#9028](https://github.com/yt-dlp/yt-dlp/issues/9028)) by [michal-repo](https://github.com/michal-repo) +- **errjupiter** + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a514cc2feb1c3b265b19acab11487acad8bb3ab0) ([#8549](https://github.com/yt-dlp/yt-dlp/issues/8549)) by [glensc](https://github.com/glensc) + - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/80ed8bdeba5a945f127ef9ab055a4823329a1210) ([#9218](https://github.com/yt-dlp/yt-dlp/issues/9218)) by [glensc](https://github.com/glensc) +- **facebook** + - [Add new ID format](https://github.com/yt-dlp/yt-dlp/commit/cf9af2c7f1fedd881a157b3fbe725e5494b00924) ([#3824](https://github.com/yt-dlp/yt-dlp/issues/3824)) by [kclauhk](https://github.com/kclauhk), [Wikidepia](https://github.com/Wikidepia) + - [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2e30b5567b5c6113d46b39163db5b044aea8667e) by [jingtra](https://github.com/jingtra), [ringus1](https://github.com/ringus1) + - [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/3c4d3ee491b0ec22ed3cade51d943d3d27141ba7) ([#9060](https://github.com/yt-dlp/yt-dlp/issues/9060)) by [kclauhk](https://github.com/kclauhk) + - [Set format HTTP chunk size](https://github.com/yt-dlp/yt-dlp/commit/5b68c478fb0b93ea6b8fac23f50e12217fa063db) ([#9058](https://github.com/yt-dlp/yt-dlp/issues/9058)) by [bashonly](https://github.com/bashonly), [kclauhk](https://github.com/kclauhk) + - [Support events](https://github.com/yt-dlp/yt-dlp/commit/9b5efaf86b99a2664fff9fc725d275f766c3221d) ([#9055](https://github.com/yt-dlp/yt-dlp/issues/9055)) by [kclauhk](https://github.com/kclauhk) + - [Support permalink URLs](https://github.com/yt-dlp/yt-dlp/commit/87286e93af949c4e6a0f8ba34af6a1ab5aa102b6) ([#9061](https://github.com/yt-dlp/yt-dlp/issues/9061)) by [kclauhk](https://github.com/kclauhk) + - ads: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a40b0070c2a00d3ed839897462171a82323aa875) ([#8870](https://github.com/yt-dlp/yt-dlp/issues/8870)) by [kclauhk](https://github.com/kclauhk) +- **flextv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/4f043479090dc8a7e06e0bb53691e5414320dfb2) ([#9178](https://github.com/yt-dlp/yt-dlp/issues/9178)) by [DmitryScaletta](https://github.com/DmitryScaletta) +- **floatplane**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/9cd90447907a59c8a2727583f4a755fb23ed8cd3) ([#8934](https://github.com/yt-dlp/yt-dlp/issues/8934)) by [chtk](https://github.com/chtk) +- **francetv** + - [Fix DAI livestreams](https://github.com/yt-dlp/yt-dlp/commit/e4fbe5f886a6693f2466877c12e99c30c5442ace) ([#9380](https://github.com/yt-dlp/yt-dlp/issues/9380)) by [bashonly](https://github.com/bashonly) + - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/9749ac7fecbfda391afbadf2870797ce0e382622) ([#9333](https://github.com/yt-dlp/yt-dlp/issues/9333)) by [bashonly](https://github.com/bashonly) + - [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ede624d1db649f5a4b61f8abbb746f365322de27) ([#9347](https://github.com/yt-dlp/yt-dlp/issues/9347)) by [bashonly](https://github.com/bashonly) +- **funk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cd0443fb14e2ed805abb02792473457553a123d1) ([#9194](https://github.com/yt-dlp/yt-dlp/issues/9194)) by [seproDev](https://github.com/seproDev) +- **generic**: [Follow https redirects properly](https://github.com/yt-dlp/yt-dlp/commit/c8c9039e640495700f76a13496e3418bdd4382ba) ([#9121](https://github.com/yt-dlp/yt-dlp/issues/9121)) by [seproDev](https://github.com/seproDev) +- **getcourseru**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/4310b6650eeb5630295f4591b37720877878c57a) ([#8873](https://github.com/yt-dlp/yt-dlp/issues/8873)) by [divStar](https://github.com/divStar), [seproDev](https://github.com/seproDev) +- **gofile**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/77c2472ca1ef9050a66aa68bc5fa1bee88706c66) ([#9074](https://github.com/yt-dlp/yt-dlp/issues/9074)) by [jazz1611](https://github.com/jazz1611) +- **googledrive**: [Fix source file extraction](https://github.com/yt-dlp/yt-dlp/commit/5498729c59b03a9511c64552da3ba2f802166f8d) ([#8990](https://github.com/yt-dlp/yt-dlp/issues/8990)) by [jazz1611](https://github.com/jazz1611) +- **goplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7e90e34fa4617b53f8c8a9e69f460508cb1f51b0) ([#6654](https://github.com/yt-dlp/yt-dlp/issues/6654)) by [alard](https://github.com/alard) +- **gopro**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4a07a455bbf7acf87550053bbba949c828e350ba) ([#9019](https://github.com/yt-dlp/yt-dlp/issues/9019)) by [stilor](https://github.com/stilor) +- **ilpost**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aa5dcc4ee65916a36cbe1b1b5b29b9110c3163ed) ([#9001](https://github.com/yt-dlp/yt-dlp/issues/9001)) by [CapacitorSet](https://github.com/CapacitorSet) +- **jiosaavnsong**: [Support more bitrates](https://github.com/yt-dlp/yt-dlp/commit/5154dc0a687528f995cde22b5ff63f82c740e98a) ([#8834](https://github.com/yt-dlp/yt-dlp/issues/8834)) by [alien-developers](https://github.com/alien-developers), [bashonly](https://github.com/bashonly) +- **kukululive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/20cdad5a2c0499d5a6746f5466a2ab0c97b75884) ([#8877](https://github.com/yt-dlp/yt-dlp/issues/8877)) by [DmitryScaletta](https://github.com/DmitryScaletta) +- **lefigarovideoembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9401736fd08767c58af45a1e36ff5929c5fa1ac9) ([#9198](https://github.com/yt-dlp/yt-dlp/issues/9198)) by [seproDev](https://github.com/seproDev) +- **linkedin**: [Fix metadata and extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/017adb28e7fe7b8c8fc472332d86740f31141519) ([#9056](https://github.com/yt-dlp/yt-dlp/issues/9056)) by [barsnick](https://github.com/barsnick) +- **magellantv**: [Support episodes](https://github.com/yt-dlp/yt-dlp/commit/3dc9232e1aa58fe3c2d8cafb50e8162d6f0e891e) ([#9199](https://github.com/yt-dlp/yt-dlp/issues/9199)) by [seproDev](https://github.com/seproDev) +- **magentamusik**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5e2e24b2c5795756d81785b06b10723ddb6db7b2) ([#7790](https://github.com/yt-dlp/yt-dlp/issues/7790)) by [pwaldhauer](https://github.com/pwaldhauer), [seproDev](https://github.com/seproDev) +- **medaltv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/02e343f6ef6d7b3f9087ff69e4a1db0b4b4a5c5d) ([#9098](https://github.com/yt-dlp/yt-dlp/issues/9098)) by [Danish-H](https://github.com/Danish-H) +- **mlbarticle**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/50e06e21a68e336198198bda332b8e7d2314f201) ([#9021](https://github.com/yt-dlp/yt-dlp/issues/9021)) by [HobbyistDev](https://github.com/HobbyistDev) +- **motherless**: [Support uploader playlists](https://github.com/yt-dlp/yt-dlp/commit/9f1e9dab21bbe651544c8f4663b0e615dc450e4d) ([#8994](https://github.com/yt-dlp/yt-dlp/issues/8994)) by [dasidiot](https://github.com/dasidiot) +- **mujrozhlas**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/4170b3d7120e06db3391eef39c5add18a1ddf2c3) ([#9306](https://github.com/yt-dlp/yt-dlp/issues/9306)) by [bashonly](https://github.com/bashonly) +- **mx3**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/5a63454b3637b3603434026cddfeac509218b90e) ([#8736](https://github.com/yt-dlp/yt-dlp/issues/8736)) by [martinxyz](https://github.com/martinxyz) +- **naver**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/a281beba8d8f007cf220f96dd1d9412bb070c7d8) ([#8883](https://github.com/yt-dlp/yt-dlp/issues/8883)) by [seproDev](https://github.com/seproDev) +- **nebula**: [Support podcasts](https://github.com/yt-dlp/yt-dlp/commit/0de09c5b9ed619d4a93d7c451c6ddff0381de808) ([#9140](https://github.com/yt-dlp/yt-dlp/issues/9140)) by [c-basalt](https://github.com/c-basalt), [seproDev](https://github.com/seproDev) +- **nerdcubedfeed**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/29a74a6126101aabaa1726ae41b1ca55cf26e7a7) ([#9269](https://github.com/yt-dlp/yt-dlp/issues/9269)) by [seproDev](https://github.com/seproDev) +- **newgrounds** + - [Fix login and clean up extraction](https://github.com/yt-dlp/yt-dlp/commit/0fcefb92f3ebfc5cada19c1e85a715f020d0f333) ([#9356](https://github.com/yt-dlp/yt-dlp/issues/9356)) by [Grub4K](https://github.com/Grub4K), [mrmedieval](https://github.com/mrmedieval) + - user: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3e083191cdc34dd8c482da9a9b4bc682f824cb9d) ([#9046](https://github.com/yt-dlp/yt-dlp/issues/9046)) by [u-spec-png](https://github.com/u-spec-png) +- **nfb**: [Add support for onf.ca and series](https://github.com/yt-dlp/yt-dlp/commit/4b8b0dded8c65cd5b2ab2e858058ba98c9bf49ff) ([#8997](https://github.com/yt-dlp/yt-dlp/issues/8997)) by [bashonly](https://github.com/bashonly), [rrgomes](https://github.com/rrgomes) +- **nhkradiru**: [Extract extended description](https://github.com/yt-dlp/yt-dlp/commit/4392447d9404e3c25cfeb8f5bdfff31b0448da39) ([#9162](https://github.com/yt-dlp/yt-dlp/issues/9162)) by [garret1317](https://github.com/garret1317) +- **nhkradirulive**: [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/5af1f19787f7d652fce72dd3ab9536cdd980fe85) ([#8956](https://github.com/yt-dlp/yt-dlp/issues/8956)) by [garret1317](https://github.com/garret1317) +- **niconico** + - [Remove legacy danmaku extraction](https://github.com/yt-dlp/yt-dlp/commit/974d444039c8bbffb57265c6792cd52d169fe1b9) ([#9209](https://github.com/yt-dlp/yt-dlp/issues/9209)) by [pzhlkj6612](https://github.com/pzhlkj6612) + - [Support DMS formats](https://github.com/yt-dlp/yt-dlp/commit/aa13a8e3dd3b698cc40ec438988b1ad834e11a41) ([#9282](https://github.com/yt-dlp/yt-dlp/issues/9282)) by [pzhlkj6612](https://github.com/pzhlkj6612), [xpadev-net](https://github.com/xpadev-net) (With fixes in [40966e8](https://github.com/yt-dlp/yt-dlp/commit/40966e8da27bbf770dacf9be9363fcc3ad72cc9f) by [pzhlkj6612](https://github.com/pzhlkj6612)) +- **ninaprotocol**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/62c65bfaf81e04e6746f6fdbafe384eb3edddfbc) ([#8946](https://github.com/yt-dlp/yt-dlp/issues/8946)) by [RaduManole](https://github.com/RaduManole), [seproDev](https://github.com/seproDev) +- **ninenews**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/43694ce13c5a9f1afca8b02b8b2b9b1576d6503d) ([#8840](https://github.com/yt-dlp/yt-dlp/issues/8840)) by [SirElderling](https://github.com/SirElderling) +- **nova**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/c168d8791d0974a8a8fcb3b4a4bc2d830df51622) ([#9221](https://github.com/yt-dlp/yt-dlp/issues/9221)) by [seproDev](https://github.com/seproDev) +- **ntvru**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7a29cbbd5fd7363e7e8535ee1506b7052465d13f) ([#9276](https://github.com/yt-dlp/yt-dlp/issues/9276)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf) +- **nuum**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/acaf806c15f0a802ba286c23af02a10cf4bd4731) ([#8868](https://github.com/yt-dlp/yt-dlp/issues/8868)) by [DmitryScaletta](https://github.com/DmitryScaletta), [seproDev](https://github.com/seproDev) +- **nytimes** + - [Extract timestamp](https://github.com/yt-dlp/yt-dlp/commit/05420227aaab60a39c0f9ade069c5862be36b1fa) ([#9142](https://github.com/yt-dlp/yt-dlp/issues/9142)) by [SirElderling](https://github.com/SirElderling) + - [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/07256b9fee23960799024b95d5972abc7174aa81) ([#9075](https://github.com/yt-dlp/yt-dlp/issues/9075)) by [SirElderling](https://github.com/SirElderling) +- **onefootball**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/644738ddaa45428cb0babd41ead22454e5a2545e) ([#9222](https://github.com/yt-dlp/yt-dlp/issues/9222)) by [seproDev](https://github.com/seproDev) +- **openrec**: [Pass referer for m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/f591e605dfee4085ec007d6d056c943cbcacc429) ([#9253](https://github.com/yt-dlp/yt-dlp/issues/9253)) by [fireattack](https://github.com/fireattack) +- **orf**: on: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a0d50aabc5462aee302bd3f2663d3a3554875789) ([#9113](https://github.com/yt-dlp/yt-dlp/issues/9113)) by [HobbyistDev](https://github.com/HobbyistDev) +- **patreon**: [Fix embedded HLS extraction](https://github.com/yt-dlp/yt-dlp/commit/f0e8bc7c60b61fe18b63116c975609d76b904771) ([#8993](https://github.com/yt-dlp/yt-dlp/issues/8993)) by [johnvictorfs](https://github.com/johnvictorfs) +- **peertube**: [Update instances](https://github.com/yt-dlp/yt-dlp/commit/35d96982f1033e36215d323317981ee17e8ab0d5) ([#9070](https://github.com/yt-dlp/yt-dlp/issues/9070)) by [Chocobozzz](https://github.com/Chocobozzz) +- **piapro**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/8e6e3651727b0b85764857fc6329fe5e0a3f00de) ([#8999](https://github.com/yt-dlp/yt-dlp/issues/8999)) by [FinnRG](https://github.com/FinnRG) +- **playsuisse**: [Add login support](https://github.com/yt-dlp/yt-dlp/commit/cae6e461073fb7c32fd32052a3e6721447c469bc) ([#9077](https://github.com/yt-dlp/yt-dlp/issues/9077)) by [chkuendig](https://github.com/chkuendig) +- **pornhub**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/de954c1b4d3a6db8a6525507e65303c7bb03f39f) ([#9227](https://github.com/yt-dlp/yt-dlp/issues/9227)) by [feederbox826](https://github.com/feederbox826) +- **pr0gramm**: [Enable POL filter and provide tags without login](https://github.com/yt-dlp/yt-dlp/commit/5f25f348f9eb5db842b1ec6799f95bebb7ba35a7) ([#9051](https://github.com/yt-dlp/yt-dlp/issues/9051)) by [Grub4K](https://github.com/Grub4K) +- **prankcastpost**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a2bac6b7adb7b0e955125838e20bb39eece630ce) ([#8933](https://github.com/yt-dlp/yt-dlp/issues/8933)) by [columndeeply](https://github.com/columndeeply) +- **radiko**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/e3ce2b385ec1f03fac9d4210c57fda77134495fc) ([#9115](https://github.com/yt-dlp/yt-dlp/issues/9115)) by [YoshichikaAAA](https://github.com/YoshichikaAAA) +- **rai** + - [Filter unavailable formats](https://github.com/yt-dlp/yt-dlp/commit/f78814923748277e7067b796f25870686fb46205) ([#9189](https://github.com/yt-dlp/yt-dlp/issues/9189)) by [nixxo](https://github.com/nixxo) + - [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/8f423cf8051fbfeedd57cca00d106012e6e86a97) ([#9291](https://github.com/yt-dlp/yt-dlp/issues/9291)) by [nixxo](https://github.com/nixxo) +- **redcdnlivx, sejm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/fcaa2e735b00b15a2b0d9f55f4187c654b4b5b39) ([#8676](https://github.com/yt-dlp/yt-dlp/issues/8676)) by [selfisekai](https://github.com/selfisekai) +- **redtube** + - [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c91d8b1899403daff6fc15206ad32de8db17fb8f) ([#9076](https://github.com/yt-dlp/yt-dlp/issues/9076)) by [jazz1611](https://github.com/jazz1611) + - [Support redtube.com.br URLs](https://github.com/yt-dlp/yt-dlp/commit/4a6ff0b47a700dee3ee5c54804c31965308479ae) ([#9103](https://github.com/yt-dlp/yt-dlp/issues/9103)) by [jazz1611](https://github.com/jazz1611) +- **ridehome**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/cd7086c0d54ec1d7e02a30bd5bd934bdb2c54642) ([#8875](https://github.com/yt-dlp/yt-dlp/issues/8875)) by [SirElderling](https://github.com/SirElderling) +- **rinsefmartistplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a36dbad712d359ec1c5b73d9bbbe562c03e9660) ([#8794](https://github.com/yt-dlp/yt-dlp/issues/8794)) by [SirElderling](https://github.com/SirElderling) +- **roosterteeth** + - [Add Brightcove fallback](https://github.com/yt-dlp/yt-dlp/commit/b2cc150ad83ba20ceb2d6e73d09854eed3c2d05c) ([#9403](https://github.com/yt-dlp/yt-dlp/issues/9403)) by [bashonly](https://github.com/bashonly) + - [Extract ad-free streams](https://github.com/yt-dlp/yt-dlp/commit/dd29e6e5fdf0f3758cb0829e73749832768f1a4e) ([#9355](https://github.com/yt-dlp/yt-dlp/issues/9355)) by [jkmartindale](https://github.com/jkmartindale) + - [Extract release date and timestamp](https://github.com/yt-dlp/yt-dlp/commit/dfd8c0b69683b1c11beea039a96dd2949026c1d7) ([#9393](https://github.com/yt-dlp/yt-dlp/issues/9393)) by [bashonly](https://github.com/bashonly) + - [Support bonus features](https://github.com/yt-dlp/yt-dlp/commit/8993721ecb34867b52b79f6e92b233008d1cbe78) ([#9406](https://github.com/yt-dlp/yt-dlp/issues/9406)) by [Bl4Cc4t](https://github.com/Bl4Cc4t) +- **rule34video** + - [Extract `creators`](https://github.com/yt-dlp/yt-dlp/commit/3d9dc2f3590e10abf1561ebdaed96734a740587c) ([#9258](https://github.com/yt-dlp/yt-dlp/issues/9258)) by [gmes78](https://github.com/gmes78) + - [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/fee2d8d9c38f9b5f0a8df347c1e698983339c34d) ([#7416](https://github.com/yt-dlp/yt-dlp/issues/7416)) by [gmes78](https://github.com/gmes78) + - [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c0ecceeefe6ebd27452d9d8f20658f83ae121d04) ([#9044](https://github.com/yt-dlp/yt-dlp/issues/9044)) by [gmes78](https://github.com/gmes78) +- **rumblechannel**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0023af81fbce01984f35b34ecaf8562739831227) ([#9092](https://github.com/yt-dlp/yt-dlp/issues/9092)) by [Pranaxcau](https://github.com/Pranaxcau), [vista-narvas](https://github.com/vista-narvas) +- **screencastify**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/0bee29493ca8f91a0055a3706c7c94f5860188df) ([#9232](https://github.com/yt-dlp/yt-dlp/issues/9232)) by [seproDev](https://github.com/seproDev) +- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ddd4b5e10a653bee78e656107710021c1b82934c) ([#8938](https://github.com/yt-dlp/yt-dlp/issues/8938)) by [diman8](https://github.com/diman8) +- **swearnet**: [Raise for login required](https://github.com/yt-dlp/yt-dlp/commit/b05640d532c43a52c0a0da096bb2dbd51e105ec0) ([#9281](https://github.com/yt-dlp/yt-dlp/issues/9281)) by [bashonly](https://github.com/bashonly) +- **tiktok**: [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d9b4154cbcb979d7e30af3a73b1bee422aae5aa3) ([#9327](https://github.com/yt-dlp/yt-dlp/issues/9327)) by [bashonly](https://github.com/bashonly) +- **trtworld**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/8ab84650837e58046430c9f4b615c56a8886e071) ([#8701](https://github.com/yt-dlp/yt-dlp/issues/8701)) by [ufukk](https://github.com/ufukk) +- **tvp**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/882e3b753c79c7799ce135c3a5edb72494b576af) ([#8860](https://github.com/yt-dlp/yt-dlp/issues/8860)) by [selfisekai](https://github.com/selfisekai) +- **twitch**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/5b8c69ae04444a4c80a5a99917e40f75a116c3b8) ([#8960](https://github.com/yt-dlp/yt-dlp/issues/8960)) by [DmitryScaletta](https://github.com/DmitryScaletta) +- **twitter** + - [Extract bitrate for HLS audio formats](https://github.com/yt-dlp/yt-dlp/commit/28e53d60df9b8aadd52a93504e30e885c9c35262) ([#9257](https://github.com/yt-dlp/yt-dlp/issues/9257)) by [bashonly](https://github.com/bashonly) + - [Extract numeric `channel_id`](https://github.com/yt-dlp/yt-dlp/commit/55f1833376505ed1e4be0516b09bb3ea4425e8a4) ([#9263](https://github.com/yt-dlp/yt-dlp/issues/9263)) by [bashonly](https://github.com/bashonly) +- **txxx**: [Extract thumbnails](https://github.com/yt-dlp/yt-dlp/commit/d79c7e9937c388c68b722ab7450960e43ef776d6) ([#9063](https://github.com/yt-dlp/yt-dlp/issues/9063)) by [shmohawk](https://github.com/shmohawk) +- **utreon**: [Support playeur.com](https://github.com/yt-dlp/yt-dlp/commit/41d6b61e9852a5b97f47cc8a7718b31fb23f0aea) ([#9182](https://github.com/yt-dlp/yt-dlp/issues/9182)) by [DmitryScaletta](https://github.com/DmitryScaletta) +- **vbox7**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/67bb70cd700c8d4c3149cd9e0539a5f32c3d1ce6) ([#9100](https://github.com/yt-dlp/yt-dlp/issues/9100)) by [seproDev](https://github.com/seproDev) +- **viewlift**: [Add support for chorki.com](https://github.com/yt-dlp/yt-dlp/commit/41b6cdb4197aaf7ad82bdad6885eb5d5c64acd74) ([#9095](https://github.com/yt-dlp/yt-dlp/issues/9095)) by [NurTasin](https://github.com/NurTasin) +- **vimeo** + - [Extract `live_status` and `release_timestamp`](https://github.com/yt-dlp/yt-dlp/commit/f0426e9ca57dd14b82e6c13afc17947614f1e8eb) ([#9290](https://github.com/yt-dlp/yt-dlp/issues/9290)) by [pzhlkj6612](https://github.com/pzhlkj6612) + - [Fix API headers](https://github.com/yt-dlp/yt-dlp/commit/8e765755f7f4909e1b535e61b7376b2d66e1ba6a) ([#9125](https://github.com/yt-dlp/yt-dlp/issues/9125)) by [bashonly](https://github.com/bashonly) + - [Fix login](https://github.com/yt-dlp/yt-dlp/commit/2e8de097ad82da378e97005e8f1ff7e5aebca585) ([#9274](https://github.com/yt-dlp/yt-dlp/issues/9274)) by [bashonly](https://github.com/bashonly) +- **viously**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/95e82347b398d8bb160767cdd975edecd62cbabd) ([#8927](https://github.com/yt-dlp/yt-dlp/issues/8927)) by [nbr23](https://github.com/nbr23), [seproDev](https://github.com/seproDev) +- **youtube** + - [Better error when all player responses are skipped](https://github.com/yt-dlp/yt-dlp/commit/5eedc208ec89d6284777060c94aadd06502338b9) ([#9083](https://github.com/yt-dlp/yt-dlp/issues/9083)) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) + - [Bump Android and iOS client versions](https://github.com/yt-dlp/yt-dlp/commit/413d3675804599bc8fe419c19e36490fd8f0b30f) ([#9317](https://github.com/yt-dlp/yt-dlp/issues/9317)) by [bashonly](https://github.com/bashonly) + - [Further bump client versions](https://github.com/yt-dlp/yt-dlp/commit/7aad06541e543fa3452d3d2513e6f079aad1f99b) ([#9395](https://github.com/yt-dlp/yt-dlp/issues/9395)) by [bashonly](https://github.com/bashonly) + - tab: [Fix `tags` extraction](https://github.com/yt-dlp/yt-dlp/commit/8828f4576bd862438d4fbf634f1d6ab18a217b0e) ([#9413](https://github.com/yt-dlp/yt-dlp/issues/9413)) by [x11x](https://github.com/x11x) +- **zenporn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f00c0def7434fac3c88503c2a77c4b2419b8e5ca) ([#8509](https://github.com/yt-dlp/yt-dlp/issues/8509)) by [SirElderling](https://github.com/SirElderling) +- **zetland**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2f4b57594673035a59d72f7667588da848820034) ([#9116](https://github.com/yt-dlp/yt-dlp/issues/9116)) by [HobbyistDev](https://github.com/HobbyistDev) + +#### Downloader changes +- **http**: [Reset resume length to handle `FileNotFoundError`](https://github.com/yt-dlp/yt-dlp/commit/2d91b9845621639c53dca7ee9d3d954f3624ba18) ([#8399](https://github.com/yt-dlp/yt-dlp/issues/8399)) by [boredzo](https://github.com/boredzo) + +#### Networking changes +- [Remove `_CompatHTTPError`](https://github.com/yt-dlp/yt-dlp/commit/811d298b231cfa29e75c321b23a91d1c2b17602c) ([#8871](https://github.com/yt-dlp/yt-dlp/issues/8871)) by [coletdjnz](https://github.com/coletdjnz) +- **Request Handler** + - [Remove additional logging handlers on close](https://github.com/yt-dlp/yt-dlp/commit/0085e2bab8465ee7d46d16fcade3ed5e96cc8a48) ([#9032](https://github.com/yt-dlp/yt-dlp/issues/9032)) by [coletdjnz](https://github.com/coletdjnz) + - requests: [Apply `remove_dot_segments` to absolute redirect locations](https://github.com/yt-dlp/yt-dlp/commit/35f4f764a786685ea45d84abe1cf1ad3847f4c97) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **build** + - [Add `default` optional dependency group](https://github.com/yt-dlp/yt-dlp/commit/cf91400a1dd6cc99b11a6d163e1af73b64d618c9) ([#9295](https://github.com/yt-dlp/yt-dlp/issues/9295)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Add transitional `setup.py` and `pyinst.py`](https://github.com/yt-dlp/yt-dlp/commit/0abf2f1f153ab47990edbeee3477dc55f74c7f89) ([#9296](https://github.com/yt-dlp/yt-dlp/issues/9296)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) + - [Bump `actions/upload-artifact` to v4 and adjust workflows](https://github.com/yt-dlp/yt-dlp/commit/3876429d72afb35247f4b2531eb9b16cfc7e0968) by [bashonly](https://github.com/bashonly) + - [Bump `conda-incubator/setup-miniconda` to v3](https://github.com/yt-dlp/yt-dlp/commit/b0059f0413a6ba6ab0a3aec1f00188ce083cd8bf) by [bashonly](https://github.com/bashonly) + - [Fix `secretstorage` for ARM builds](https://github.com/yt-dlp/yt-dlp/commit/920397634d1e84e76d2cb897bd6d69ba0c6bd5ca) by [bashonly](https://github.com/bashonly) + - [Migrate to `pyproject.toml` and `hatchling`](https://github.com/yt-dlp/yt-dlp/commit/775cde82dc5b1dc64ab0539a92dd8c7ba6c0ad33) by [bashonly](https://github.com/bashonly) (With fixes in [43cfd46](https://github.com/yt-dlp/yt-dlp/commit/43cfd462c0d01eff22c1d4290aeb96eb1ea2c0e1)) + - [Move bundle scripts into `bundle` submodule](https://github.com/yt-dlp/yt-dlp/commit/a1b778428991b1779203bac243ef4e9b6baea90c) by [bashonly](https://github.com/bashonly) + - [Support failed build job re-runs](https://github.com/yt-dlp/yt-dlp/commit/eabbccc439720fba381919a88be4fe4d96464cbd) ([#9277](https://github.com/yt-dlp/yt-dlp/issues/9277)) by [bashonly](https://github.com/bashonly) + - Makefile + - [Add automated `CODE_FOLDERS` and `CODE_FILES`](https://github.com/yt-dlp/yt-dlp/commit/868d2f60a7cb59b410c8cbfb452cbdb072687b81) by [bashonly](https://github.com/bashonly) + - [Ensure compatibility with BSD `make`](https://github.com/yt-dlp/yt-dlp/commit/beaa1a44554d04d9fe63a743a5bb4431ca778f28) ([#9210](https://github.com/yt-dlp/yt-dlp/issues/9210)) by [bashonly](https://github.com/bashonly) (With fixes in [73fcfa3](https://github.com/yt-dlp/yt-dlp/commit/73fcfa39f59113a8728249de2c4cee3025f17dc2)) + - [Fix man pages generated by `pandoc>=3`](https://github.com/yt-dlp/yt-dlp/commit/fb44020fa98e47620b3aa1dab94b4c5b7bfb40bd) ([#7047](https://github.com/yt-dlp/yt-dlp/issues/7047)) by [t-nil](https://github.com/t-nil) +- **ci**: [Bump `actions/setup-python` to v5](https://github.com/yt-dlp/yt-dlp/commit/b14e818b37f62e3224da157b3ad768b3f0815fcd) by [bashonly](https://github.com/bashonly) +- **cleanup** + - [Build files cleanup](https://github.com/yt-dlp/yt-dlp/commit/867f637b95b342e1cb9f1dc3c6cf0ffe727187ce) by [bashonly](https://github.com/bashonly) + - [Fix infodict returned fields](https://github.com/yt-dlp/yt-dlp/commit/f4f9f6d00edcac6d4eb2b3fb78bf81326235d492) ([#8906](https://github.com/yt-dlp/yt-dlp/issues/8906)) by [seproDev](https://github.com/seproDev) + - [Fix typo in README.md](https://github.com/yt-dlp/yt-dlp/commit/292d60b1ed3b9fe5bcb2775a894cca99b0f9473e) ([#8894](https://github.com/yt-dlp/yt-dlp/issues/8894)) by [antonkesy](https://github.com/antonkesy) + - [Mark broken and remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/df773c3d5d1cc1f877cf8582f0072e386fc49318) ([#9238](https://github.com/yt-dlp/yt-dlp/issues/9238)) by [seproDev](https://github.com/seproDev) + - [Match both `http` and `https` in `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a687226b48f71b874fa18b0165ec528d591f53fb) ([#8968](https://github.com/yt-dlp/yt-dlp/issues/8968)) by [seproDev](https://github.com/seproDev) + - [Remove unused code](https://github.com/yt-dlp/yt-dlp/commit/ed3bb2b0a12c44334e0d09481752dabf2ca1dc13) ([#8968](https://github.com/yt-dlp/yt-dlp/issues/8968)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) + - Miscellaneous + - [93240fc](https://github.com/yt-dlp/yt-dlp/commit/93240fc1848de4a94f25844c96e0dcd282ef1d3b) by [bashonly](https://github.com/bashonly), [Grub4k](https://github.com/Grub4k), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) + - [615a844](https://github.com/yt-dlp/yt-dlp/commit/615a84447e8322720be77a0e64298d7f42848693) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) +- **devscripts** + - `install_deps`: [Add script and migrate to it](https://github.com/yt-dlp/yt-dlp/commit/b8a433aaca86b15cb9f1a451b0f69371d2fc22a9) by [bashonly](https://github.com/bashonly) + - `tomlparse`: [Add makeshift toml parser](https://github.com/yt-dlp/yt-dlp/commit/fd647775e27e030ab17387c249e2ebeba68f8ff0) by [Grub4K](https://github.com/Grub4K) +- **docs**: [Misc Cleanup](https://github.com/yt-dlp/yt-dlp/commit/47ab66db0f083a76c7fba0f6e136b21dd5a93e3b) ([#8977](https://github.com/yt-dlp/yt-dlp/issues/8977)) by [Arthurszzz](https://github.com/Arthurszzz), [bashonly](https://github.com/bashonly), [Grub4k](https://github.com/Grub4k), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) +- **test** + - [Skip source address tests if the address cannot be bound to](https://github.com/yt-dlp/yt-dlp/commit/69d31914952dd33082ac7019c6f76b43c45b9d06) ([#8900](https://github.com/yt-dlp/yt-dlp/issues/8900)) by [coletdjnz](https://github.com/coletdjnz) + - websockets: [Fix timeout test on Windows](https://github.com/yt-dlp/yt-dlp/commit/ac340d0745a9de5d494033e3507ef624ba25add3) ([#9344](https://github.com/yt-dlp/yt-dlp/issues/9344)) by [seproDev](https://github.com/seproDev) + +### 2023.12.30 + +#### Core changes +- [Fix format selection parse error for CPython 3.12](https://github.com/yt-dlp/yt-dlp/commit/00cdda4f6fe18712ced13dbc64b7ea10f323e268) ([#8797](https://github.com/yt-dlp/yt-dlp/issues/8797)) by [Grub4K](https://github.com/Grub4K) +- [Let `read_stdin` obey `--quiet`](https://github.com/yt-dlp/yt-dlp/commit/a174c453ee1e853c584ceadeac17eef2bd433dc5) by [pukkandan](https://github.com/pukkandan) +- [Merged with youtube-dl be008e6](https://github.com/yt-dlp/yt-dlp/commit/65de7d204ce88c0225df1321060304baab85dbd8) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [Grub4K](https://github.com/Grub4K) +- [Parse `release_year` from `release_date`](https://github.com/yt-dlp/yt-dlp/commit/1732eccc0a40256e076bf0435a29f0f1d8419280) ([#8524](https://github.com/yt-dlp/yt-dlp/issues/8524)) by [seproDev](https://github.com/seproDev) +- [Release workflow and Updater cleanup](https://github.com/yt-dlp/yt-dlp/commit/632b8ee54eb2df8ac6e20746a0bd95b7ebb053aa) ([#8640](https://github.com/yt-dlp/yt-dlp/issues/8640)) by [bashonly](https://github.com/bashonly) +- [Remove Python 3.7 support](https://github.com/yt-dlp/yt-dlp/commit/f4b95acafcd69a50040730dfdf732e797278fdcc) ([#8361](https://github.com/yt-dlp/yt-dlp/issues/8361)) by [bashonly](https://github.com/bashonly) +- [Support `NO_COLOR` environment variable](https://github.com/yt-dlp/yt-dlp/commit/a0b19d319a6ce8b7059318fa17a34b144fde1785) ([#8385](https://github.com/yt-dlp/yt-dlp/issues/8385)) by [Grub4K](https://github.com/Grub4K), [prettykool](https://github.com/prettykool) +- **outtmpl**: [Support multiplication](https://github.com/yt-dlp/yt-dlp/commit/993edd3f6e17e966c763bc86dc34125445cec6b6) by [pukkandan](https://github.com/pukkandan) +- **utils**: `traverse_obj`: [Move `is_user_input` into output template](https://github.com/yt-dlp/yt-dlp/commit/0b6f829b1dfda15d3c1d7d1fbe4ea6102c26dd24) ([#8673](https://github.com/yt-dlp/yt-dlp/issues/8673)) by [Grub4K](https://github.com/Grub4K) +- **webvtt**: [Allow spaces before newlines for CueBlock](https://github.com/yt-dlp/yt-dlp/commit/15f22b4880b6b3f71f350c64d70976ae65b9f1ca) ([#7681](https://github.com/yt-dlp/yt-dlp/issues/7681)) by [TSRBerry](https://github.com/TSRBerry) (With fixes in [298230e](https://github.com/yt-dlp/yt-dlp/commit/298230e550886b746c266724dd701d842ca2696e) by [pukkandan](https://github.com/pukkandan)) + +#### Extractor changes +- [Add `media_type` field](https://github.com/yt-dlp/yt-dlp/commit/e370f9ec36972d06100a3db893b397bfc1b07b4d) by [trainman261](https://github.com/trainman261) +- [Extract from `media` elements in SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/ddb2d7588bea48bae965dbfabe6df6550c9d3d43) ([#8504](https://github.com/yt-dlp/yt-dlp/issues/8504)) by [seproDev](https://github.com/seproDev) +- **abematv**: [Fix season metadata](https://github.com/yt-dlp/yt-dlp/commit/cc07f5cc85d9e2a6cd0bedb9d961665eea0d6047) ([#8607](https://github.com/yt-dlp/yt-dlp/issues/8607)) by [middlingphys](https://github.com/middlingphys) +- **allstar**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/3237f8ba29fe13bf95ff42b1e48b5b5109715feb) ([#8274](https://github.com/yt-dlp/yt-dlp/issues/8274)) by [S-Aarab](https://github.com/S-Aarab) +- **altcensored**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3f90813f0617e0d21302398010de7496c9ae36aa) ([#8291](https://github.com/yt-dlp/yt-dlp/issues/8291)) by [drzraf](https://github.com/drzraf) +- **ard**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/5f009a094f0e8450792b097c4c8273622778052d) ([#8878](https://github.com/yt-dlp/yt-dlp/issues/8878)) by [seproDev](https://github.com/seproDev) +- **ardbetamediathek**: [Fix series extraction](https://github.com/yt-dlp/yt-dlp/commit/1f8bd8eba82ba10ddb49ee7cc0be4540dab103d5) ([#8687](https://github.com/yt-dlp/yt-dlp/issues/8687)) by [lstrojny](https://github.com/lstrojny) +- **bbc** + - [Extract more formats](https://github.com/yt-dlp/yt-dlp/commit/c919b68f7e79ea5010f75f648d3c9e45405a8011) ([#8321](https://github.com/yt-dlp/yt-dlp/issues/8321)) by [barsnick](https://github.com/barsnick), [dirkf](https://github.com/dirkf) + - [Fix JSON parsing bug](https://github.com/yt-dlp/yt-dlp/commit/19741ab8a401ec64d5e84fdbfcfb141d105e7bc8) by [bashonly](https://github.com/bashonly) +- **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4903f452b68efb62dadf22e81be8c7934fc743e7) ([#8651](https://github.com/yt-dlp/yt-dlp/issues/8651)) by [bashonly](https://github.com/bashonly) +- **bilibili**: [Support courses and interactive videos](https://github.com/yt-dlp/yt-dlp/commit/9f09bdcfcb8e2b4b2decdc30d35d34b993bc7a94) ([#8343](https://github.com/yt-dlp/yt-dlp/issues/8343)) by [c-basalt](https://github.com/c-basalt) +- **bitchute**: [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/b1a1ec1540605d2ea7abdb63336ffb1c56bf6316) ([#8507](https://github.com/yt-dlp/yt-dlp/issues/8507)) by [SirElderling](https://github.com/SirElderling) +- **box**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/5a230233d6fce06f4abd1fce0dc92b948e6f780b) ([#8649](https://github.com/yt-dlp/yt-dlp/issues/8649)) by [bashonly](https://github.com/bashonly) +- **bundestag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/00a3e47bf5440c96025a76e08337ff2a475ed83e) ([#8783](https://github.com/yt-dlp/yt-dlp/issues/8783)) by [Grub4K](https://github.com/Grub4K) +- **drtv**: [Set default ext for m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/f96ab86cd837b1b5823baa87d144e15322ee9298) ([#8590](https://github.com/yt-dlp/yt-dlp/issues/8590)) by [seproDev](https://github.com/seproDev) +- **duoplay**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/66a0127d45033c698bdbedf162cddc55d9e7b906) ([#8542](https://github.com/yt-dlp/yt-dlp/issues/8542)) by [glensc](https://github.com/glensc) +- **eplus**: [Add login support and DRM detection](https://github.com/yt-dlp/yt-dlp/commit/d5d1517e7d838500800d193ac3234b06e89654cd) ([#8661](https://github.com/yt-dlp/yt-dlp/issues/8661)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **facebook** + - [Fix Memories extraction](https://github.com/yt-dlp/yt-dlp/commit/c39358a54bc6675ae0c50b81024e5a086e41656a) ([#8681](https://github.com/yt-dlp/yt-dlp/issues/8681)) by [kclauhk](https://github.com/kclauhk) + - [Improve subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/9cafb9ff17e14475a35c9a58b5bb010c86c9db4b) ([#8296](https://github.com/yt-dlp/yt-dlp/issues/8296)) by [kclauhk](https://github.com/kclauhk) +- **floatplane**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/628fa244bbce2ad39775a5959e99588f30cac152) ([#8639](https://github.com/yt-dlp/yt-dlp/issues/8639)) by [seproDev](https://github.com/seproDev) +- **francetv**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/71f28097fec1c9e029f74b68a4eadc8915399840) ([#8409](https://github.com/yt-dlp/yt-dlp/issues/8409)) by [Fymyte](https://github.com/Fymyte) +- **instagram**: [Fix stories extraction](https://github.com/yt-dlp/yt-dlp/commit/50eaea9fd7787546b53660e736325fa31c77765d) ([#8843](https://github.com/yt-dlp/yt-dlp/issues/8843)) by [bashonly](https://github.com/bashonly) +- **joqrag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/db8b4edc7d0bd27da462f6fe82ff6e13e3d68a04) ([#8384](https://github.com/yt-dlp/yt-dlp/issues/8384)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **litv**: [Fix premium content extraction](https://github.com/yt-dlp/yt-dlp/commit/f45c4efcd928a173e1300a8f1ce4258e70c969b1) ([#8842](https://github.com/yt-dlp/yt-dlp/issues/8842)) by [bashonly](https://github.com/bashonly) +- **maariv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c5f01bf7d4b9426c87c3f8248de23934a56579e0) ([#8331](https://github.com/yt-dlp/yt-dlp/issues/8331)) by [amir16yp](https://github.com/amir16yp) +- **mediastream**: [Fix authenticated format extraction](https://github.com/yt-dlp/yt-dlp/commit/b03c89309eb141be1a1eceeeb7475dd3b7529ad9) ([#8657](https://github.com/yt-dlp/yt-dlp/issues/8657)) by [NickCis](https://github.com/NickCis) +- **nebula**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/45d82be65f71bb05506bd55376c6fdb36bc54142) ([#8566](https://github.com/yt-dlp/yt-dlp/issues/8566)) by [elyse0](https://github.com/elyse0), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) +- **nintendo**: [Fix Nintendo Direct extraction](https://github.com/yt-dlp/yt-dlp/commit/1d24da6c899ef280d8b0a48a5e280ecd5d39cdf4) ([#8609](https://github.com/yt-dlp/yt-dlp/issues/8609)) by [Grub4K](https://github.com/Grub4K) +- **ondemandkorea**: [Fix upgraded format extraction](https://github.com/yt-dlp/yt-dlp/commit/04a5e06350e3ef7c03f94f2f3f90dd96c6411152) ([#8677](https://github.com/yt-dlp/yt-dlp/issues/8677)) by [seproDev](https://github.com/seproDev) +- **pr0gramm**: [Support variant formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/f98a3305eb124a0c375d03209d5c5a64fe1766c8) ([#8674](https://github.com/yt-dlp/yt-dlp/issues/8674)) by [Grub4K](https://github.com/Grub4K) +- **rinsefm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c91af948e43570025e4aa887e248fd025abae394) ([#8778](https://github.com/yt-dlp/yt-dlp/issues/8778)) by [hashFactory](https://github.com/hashFactory) +- **rudovideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0d531c35eca4c2eb36e160530a7a333edbc727cc) ([#8664](https://github.com/yt-dlp/yt-dlp/issues/8664)) by [nicodato](https://github.com/nicodato) +- **theguardian**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1fa3f24d4b5d22176b11d78420f1f4b64a5af0a8) ([#8535](https://github.com/yt-dlp/yt-dlp/issues/8535)) by [SirElderling](https://github.com/SirElderling) +- **theplatform**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/7e09c147fdccb44806bbf601573adc4b77210a89) ([#8635](https://github.com/yt-dlp/yt-dlp/issues/8635)) by [trainman261](https://github.com/trainman261) +- **twitcasting**: [Detect livestreams via API and `show` page](https://github.com/yt-dlp/yt-dlp/commit/585d0ed9abcfcb957f2b2684b8ad43c3af160383) ([#8601](https://github.com/yt-dlp/yt-dlp/issues/8601)) by [bashonly](https://github.com/bashonly), [JC-Chung](https://github.com/JC-Chung) +- **twitcastinguser**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/ff2fde1b8f922fd34bae6172602008cd67c07c93) ([#8650](https://github.com/yt-dlp/yt-dlp/issues/8650)) by [bashonly](https://github.com/bashonly) +- **twitter** + - [Extract stale tweets](https://github.com/yt-dlp/yt-dlp/commit/1c54a98e19d047e7c15184237b6ef8ad50af489c) ([#8724](https://github.com/yt-dlp/yt-dlp/issues/8724)) by [bashonly](https://github.com/bashonly) + - [Prioritize m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/e7d22348e77367740da78a3db27167ecf894b7c9) ([#8826](https://github.com/yt-dlp/yt-dlp/issues/8826)) by [bashonly](https://github.com/bashonly) + - [Work around API rate-limit](https://github.com/yt-dlp/yt-dlp/commit/116c268438ea4d3738f6fa502c169081ca8f0ee7) ([#8825](https://github.com/yt-dlp/yt-dlp/issues/8825)) by [bashonly](https://github.com/bashonly) + - broadcast: [Extract `concurrent_view_count`](https://github.com/yt-dlp/yt-dlp/commit/6fe82491ed622b948c512cf4aab46ac3a234ae0a) ([#8600](https://github.com/yt-dlp/yt-dlp/issues/8600)) by [sonmezberkay](https://github.com/sonmezberkay) +- **vidly**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/34df1c1f60fa652c0a6a5c712b06c10e45daf6b7) ([#8612](https://github.com/yt-dlp/yt-dlp/issues/8612)) by [seproDev](https://github.com/seproDev) +- **vocaroo**: [Do not use deprecated `getheader`](https://github.com/yt-dlp/yt-dlp/commit/f223b1b0789f65e06619dcc9fc9e74f50d259379) ([#8606](https://github.com/yt-dlp/yt-dlp/issues/8606)) by [qbnu](https://github.com/qbnu) +- **vvvvid**: [Set user-agent to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1725e943b0e8a8b585305660d4611e684374409c) ([#8615](https://github.com/yt-dlp/yt-dlp/issues/8615)) by [Kyraminol](https://github.com/Kyraminol) +- **youtube** + - [Fix `like_count` extraction](https://github.com/yt-dlp/yt-dlp/commit/6b5d93b0b0240e287389d1d43b2d5293e18aa4cc) ([#8763](https://github.com/yt-dlp/yt-dlp/issues/8763)) by [Ganesh910](https://github.com/Ganesh910) + - [Improve detection of faulty HLS formats](https://github.com/yt-dlp/yt-dlp/commit/bb5a54e6db2422bbd155d93a0e105b6616c09467) ([#8646](https://github.com/yt-dlp/yt-dlp/issues/8646)) by [bashonly](https://github.com/bashonly) + - [Return empty playlist when channel/tab has no videos](https://github.com/yt-dlp/yt-dlp/commit/044886c220620a7679109e92352890e18b6079e3) by [pukkandan](https://github.com/pukkandan) + - [Support cf.piped.video](https://github.com/yt-dlp/yt-dlp/commit/6a9c7a2b52655bacfa7ab2da24fd0d14a6fff495) ([#8514](https://github.com/yt-dlp/yt-dlp/issues/8514)) by [OIRNOIR](https://github.com/OIRNOIR) +- **zingmp3**: [Add support for radio and podcasts](https://github.com/yt-dlp/yt-dlp/commit/64de1a4c25bada90374b88d7353754fe8fbfcc51) ([#7189](https://github.com/yt-dlp/yt-dlp/issues/7189)) by [hatienl0i261299](https://github.com/hatienl0i261299) + +#### Postprocessor changes +- **ffmpegmetadata**: [Embed stream metadata in single format downloads](https://github.com/yt-dlp/yt-dlp/commit/deeb13eae82e60f82a2c0c5861f460399a997528) ([#8647](https://github.com/yt-dlp/yt-dlp/issues/8647)) by [bashonly](https://github.com/bashonly) + +#### Networking changes +- [Strip whitespace around header values](https://github.com/yt-dlp/yt-dlp/commit/196eb0fe77b78e2e5ca02c506c3837c2b1a7964c) ([#8802](https://github.com/yt-dlp/yt-dlp/issues/8802)) by [coletdjnz](https://github.com/coletdjnz) +- **Request Handler**: websockets: [Migrate websockets to networking framework](https://github.com/yt-dlp/yt-dlp/commit/ccfd70f4c24b579c72123ca76ab50164f8f122b7) ([#7720](https://github.com/yt-dlp/yt-dlp/issues/7720)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **ci** + - [Concurrency optimizations](https://github.com/yt-dlp/yt-dlp/commit/f124fa458826308afc86cf364c509f857686ecfd) ([#8614](https://github.com/yt-dlp/yt-dlp/issues/8614)) by [Grub4K](https://github.com/Grub4K) + - [Run core tests only for core changes](https://github.com/yt-dlp/yt-dlp/commit/13b3cb3c2b7169a1e17d6fc62593bf744170521c) ([#8841](https://github.com/yt-dlp/yt-dlp/issues/8841)) by [Grub4K](https://github.com/Grub4K) +- **cleanup** + - [Fix spelling of `IE_NAME`](https://github.com/yt-dlp/yt-dlp/commit/bc4ab17b38f01000d99c5c2bedec89721fee65ec) ([#8810](https://github.com/yt-dlp/yt-dlp/issues/8810)) by [barsnick](https://github.com/barsnick) + - [Remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/9751a457cfdb18bf99d9ee0d10e4e6a594502bbf) ([#8604](https://github.com/yt-dlp/yt-dlp/issues/8604)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [f9fb3ce](https://github.com/yt-dlp/yt-dlp/commit/f9fb3ce86e3c6a0c3c33b45392b8d7288bceba76) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) +- **devscripts**: `run_tests`: [Create Python script](https://github.com/yt-dlp/yt-dlp/commit/2d1d683a541d71f3d3bb999dfe8eeb1976fb91ce) ([#8720](https://github.com/yt-dlp/yt-dlp/issues/8720)) by [Grub4K](https://github.com/Grub4K) (With fixes in [225cf2b](https://github.com/yt-dlp/yt-dlp/commit/225cf2b830a1de2c5eacd257edd2a01aed1e1114)) +- **docs**: [Update youtube-dl merge commit in `README.md`](https://github.com/yt-dlp/yt-dlp/commit/f10589e3453009bb523f55849bba144c9b91cf2a) by [bashonly](https://github.com/bashonly) +- **test**: networking: [Update tests for OpenSSL 3.2](https://github.com/yt-dlp/yt-dlp/commit/37755a037e612bfc608c3d4722e8ef2ce6a022ee) ([#8814](https://github.com/yt-dlp/yt-dlp/issues/8814)) by [bashonly](https://github.com/bashonly) + +### 2023.11.16 + +#### Extractor changes +- **abc.net.au**: iview, showseries: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/15cb3528cbda7b6198f49a6b5953c226d701696b) ([#8586](https://github.com/yt-dlp/yt-dlp/issues/8586)) by [bashonly](https://github.com/bashonly) +- **beatbump**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/21dc069bea2d4d99345dd969e098f4535c751d45) ([#8576](https://github.com/yt-dlp/yt-dlp/issues/8576)) by [seproDev](https://github.com/seproDev) +- **dailymotion**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a489f071508ec5caf5f32052d142afe86c28df7a) ([#7692](https://github.com/yt-dlp/yt-dlp/issues/7692)) by [TravisDupes](https://github.com/TravisDupes) +- **drtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0783fd558ed0d3a8bc754beb75a406256f8b97b2) ([#8484](https://github.com/yt-dlp/yt-dlp/issues/8484)) by [almx](https://github.com/almx), [seproDev](https://github.com/seproDev) +- **eltrecetv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/dcfad52812aa8ce007cefbfbe63f58b49f6b1046) ([#8216](https://github.com/yt-dlp/yt-dlp/issues/8216)) by [elivinsky](https://github.com/elivinsky) +- **jiosaavn**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b530118e7f48232cacf8050d79a6b20bdfcf5468) ([#8307](https://github.com/yt-dlp/yt-dlp/issues/8307)) by [awalgarg](https://github.com/awalgarg) +- **njpwworld**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/e569c2d1f4b665795a2b64f0aaf7f76930664233) ([#8570](https://github.com/yt-dlp/yt-dlp/issues/8570)) by [aarubui](https://github.com/aarubui) +- **tv5mondeplus**: [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/0f634dba3afdc429ece8839b02f6d56c27b7973a) ([#4209](https://github.com/yt-dlp/yt-dlp/issues/4209)) by [FrankZ85](https://github.com/FrankZ85) +- **twitcasting**: [Fix livestream detection](https://github.com/yt-dlp/yt-dlp/commit/2325d03aa7bb80f56ba52cd6992258e44727b424) ([#8574](https://github.com/yt-dlp/yt-dlp/issues/8574)) by [JC-Chung](https://github.com/JC-Chung) +- **zenyandex**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5efe68b73cbf6e907c2e6a3aa338664385084184) ([#8454](https://github.com/yt-dlp/yt-dlp/issues/8454)) by [starius](https://github.com/starius) + +#### Misc. changes +- **build**: [Make `secretstorage` an optional dependency](https://github.com/yt-dlp/yt-dlp/commit/24f827875c6ba513f12ed09a3aef2bbed223760d) ([#8585](https://github.com/yt-dlp/yt-dlp/issues/8585)) by [bashonly](https://github.com/bashonly) + +### 2023.11.14 + +#### Important changes +- **The release channels have been adjusted!** + * [`master`](https://github.com/yt-dlp/yt-dlp-master-builds) builds are made after each push, containing the latest fixes (but also possibly bugs). This was previously the `nightly` channel. + * [`nightly`](https://github.com/yt-dlp/yt-dlp-nightly-builds) builds are now made once a day, if there were any changes. +- Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x) + - Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers + +#### Core changes +- [Add `--compat-option manifest-filesize-approx`](https://github.com/yt-dlp/yt-dlp/commit/10025b715ea01489557eb2c5a3cc04d361fcdb52) ([#8356](https://github.com/yt-dlp/yt-dlp/issues/8356)) by [bashonly](https://github.com/bashonly) +- [Fix format sorting with `--load-info-json`](https://github.com/yt-dlp/yt-dlp/commit/595ea4a99b726b8fe9463e7853b7053978d0544e) ([#8521](https://github.com/yt-dlp/yt-dlp/issues/8521)) by [bashonly](https://github.com/bashonly) +- [Include build origin in verbose output](https://github.com/yt-dlp/yt-dlp/commit/20314dd46f25e0e0a7e985a7804049aefa8b909f) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- [Only ensure playlist thumbnail dir if writing thumbs](https://github.com/yt-dlp/yt-dlp/commit/a40e0b37dfc8c26916b0e01aa3f29f3bc42250b6) ([#8373](https://github.com/yt-dlp/yt-dlp/issues/8373)) by [bashonly](https://github.com/bashonly) +- **update**: [Overhaul self-updater](https://github.com/yt-dlp/yt-dlp/commit/0b6ad22e6a432006a75df968f0283e6c6b3cfae6) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- [Do not smuggle `http_headers`](https://github.com/yt-dlp/yt-dlp/commit/f04b5bedad7b281bee9814686bba1762bae092eb) by [coletdjnz](https://github.com/coletdjnz) +- [Do not test truth value of `xml.etree.ElementTree.Element`](https://github.com/yt-dlp/yt-dlp/commit/d4f14a72dc1dd79396e0e80980268aee902b61e4) ([#8582](https://github.com/yt-dlp/yt-dlp/issues/8582)) by [bashonly](https://github.com/bashonly) +- **brilliantpala**: [Fix cookies support](https://github.com/yt-dlp/yt-dlp/commit/9b5bedf13a3323074daceb0ec6ebb3cc6e0b9684) ([#8352](https://github.com/yt-dlp/yt-dlp/issues/8352)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **generic**: [Improve direct video link ext detection](https://github.com/yt-dlp/yt-dlp/commit/4ce2f29a50fcfb9920e6f2ffe42192945a2bad7e) ([#8340](https://github.com/yt-dlp/yt-dlp/issues/8340)) by [bashonly](https://github.com/bashonly) +- **laxarxames**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/312a2d1e8bc247264f9d85c5ec764e33aa0133b5) ([#8412](https://github.com/yt-dlp/yt-dlp/issues/8412)) by [aniolpages](https://github.com/aniolpages) +- **n-tv.de**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8afd9468b0c822843bc480d366d1c86698daabfb) ([#8414](https://github.com/yt-dlp/yt-dlp/issues/8414)) by [1100101](https://github.com/1100101) +- **neteasemusic**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/46acc418a53470b7f32581b3309c3cb87aa8488d) ([#8531](https://github.com/yt-dlp/yt-dlp/issues/8531)) by [LoserFox](https://github.com/LoserFox) +- **nhk**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/54579be4364e148277c32e20a5c3efc2c3f52f5b) ([#8388](https://github.com/yt-dlp/yt-dlp/issues/8388)) by [garret1317](https://github.com/garret1317) +- **novaembed**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/3ff494f6f41c27549420fa88be27555bd449ffdc) ([#8368](https://github.com/yt-dlp/yt-dlp/issues/8368)) by [peci1](https://github.com/peci1) +- **npo**: [Send `POST` request to streams API endpoint](https://github.com/yt-dlp/yt-dlp/commit/8e02a4dcc800f9444e9d461edc41edd7b662f435) ([#8413](https://github.com/yt-dlp/yt-dlp/issues/8413)) by [bartbroere](https://github.com/bartbroere) +- **ondemandkorea**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/05adfd883a4f2ecae0267e670a62a2e45c351aeb) ([#8386](https://github.com/yt-dlp/yt-dlp/issues/8386)) by [seproDev](https://github.com/seproDev) +- **orf**: podcast: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6ba3085616652cbf05d1858efc321fdbfc4c6119) ([#8486](https://github.com/yt-dlp/yt-dlp/issues/8486)) by [Esokrates](https://github.com/Esokrates) +- **polskieradio**: audition: [Fix playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/464327acdb353ceb91d2115163a5a9621b22fe0d) ([#8459](https://github.com/yt-dlp/yt-dlp/issues/8459)) by [shubhexists](https://github.com/shubhexists) +- **qdance**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/177f0d963e4b9db749805c482e6f288354c8be84) ([#8426](https://github.com/yt-dlp/yt-dlp/issues/8426)) by [bashonly](https://github.com/bashonly) +- **radiocomercial**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/ef12dbdcd3e7264bd3d744c1e3107597bd23ad35) ([#8508](https://github.com/yt-dlp/yt-dlp/issues/8508)) by [SirElderling](https://github.com/SirElderling) +- **sbs.co.kr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/25a4bd345a0dcfece6fef752d4537eb403da94d9) ([#8326](https://github.com/yt-dlp/yt-dlp/issues/8326)) by [seproDev](https://github.com/seproDev) +- **theatercomplextown**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/2863fcf2b6876d0c7965ff7d6d9242eea653dc6b) ([#8560](https://github.com/yt-dlp/yt-dlp/issues/8560)) by [bashonly](https://github.com/bashonly) +- **thisav**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/cb480e390d85fb3a598c1b6d5eef3438ce729fc9) ([#8346](https://github.com/yt-dlp/yt-dlp/issues/8346)) by [bashonly](https://github.com/bashonly) +- **thisoldhouse**: [Add login support](https://github.com/yt-dlp/yt-dlp/commit/c76c96677ff6a056f5844a568ef05ee22c46d6f4) ([#8561](https://github.com/yt-dlp/yt-dlp/issues/8561)) by [bashonly](https://github.com/bashonly) +- **twitcasting**: [Fix livestream extraction](https://github.com/yt-dlp/yt-dlp/commit/7b8b1cf5eb8bf44ce70bc24e1f56f0dba2737e98) ([#8427](https://github.com/yt-dlp/yt-dlp/issues/8427)) by [JC-Chung](https://github.com/JC-Chung), [saintliao](https://github.com/saintliao) +- **twitter** + - broadcast + - [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7d337ca977d73a0a6c07ab481ed8faa8f6ff8726) ([#8383](https://github.com/yt-dlp/yt-dlp/issues/8383)) by [HitomaruKonpaku](https://github.com/HitomaruKonpaku) + - [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/f6e97090d2ed9e05441ab0f4bec3559b816d7a00) ([#8475](https://github.com/yt-dlp/yt-dlp/issues/8475)) by [bashonly](https://github.com/bashonly) +- **weibo**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/15b252dfd2c6807fe57afc5a95e59abadb32ccd2) ([#8463](https://github.com/yt-dlp/yt-dlp/issues/8463)) by [c-basalt](https://github.com/c-basalt) +- **weverse**: [Fix login error handling](https://github.com/yt-dlp/yt-dlp/commit/4a601c9eff9fb42e24a4c8da3fa03628e035b35b) ([#8458](https://github.com/yt-dlp/yt-dlp/issues/8458)) by [seproDev](https://github.com/seproDev) +- **youtube**: [Check newly uploaded iOS HLS formats](https://github.com/yt-dlp/yt-dlp/commit/ef79d20dc9d27ac002a7196f073b37f2f2721aed) ([#8336](https://github.com/yt-dlp/yt-dlp/issues/8336)) by [bashonly](https://github.com/bashonly) +- **zoom**: [Extract combined view formats](https://github.com/yt-dlp/yt-dlp/commit/3906de07551fedb00b789345bf24cc27d6ddf128) ([#7847](https://github.com/yt-dlp/yt-dlp/issues/7847)) by [Mipsters](https://github.com/Mipsters) + +#### Downloader changes +- **aria2c**: [Remove duplicate `--file-allocation=none`](https://github.com/yt-dlp/yt-dlp/commit/21b25281c51523620706b11bfc1c4a889858e1f2) ([#8332](https://github.com/yt-dlp/yt-dlp/issues/8332)) by [CrendKing](https://github.com/CrendKing) +- **dash**: [Force native downloader for `--live-from-start`](https://github.com/yt-dlp/yt-dlp/commit/2622c804d1a5accc3045db398e0fc52074f4bdb3) ([#8339](https://github.com/yt-dlp/yt-dlp/issues/8339)) by [bashonly](https://github.com/bashonly) + +#### Networking changes +- **Request Handler**: requests: [Add handler for `requests` HTTP library (#3668)](https://github.com/yt-dlp/yt-dlp/commit/8a8b54523addf46dfd50ef599761a81bc22362e6) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K) (With fixes in [4e38e2a](https://github.com/yt-dlp/yt-dlp/commit/4e38e2ae9d7380015349e6aee59c78bb3938befd)) + + Adds support for HTTPS proxies and persistent connections (keep-alive) + +#### Misc. changes +- **build** + - [Include secretstorage in Linux builds](https://github.com/yt-dlp/yt-dlp/commit/9970d74c8383432c6c8779aa47d3253dcf412b14) by [bashonly](https://github.com/bashonly) + - [Overhaul and unify release workflow](https://github.com/yt-dlp/yt-dlp/commit/1d03633c5a1621b9f3a756f0a4f9dc61fab3aeaa) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- **ci** + - [Bump `actions/checkout` to v4](https://github.com/yt-dlp/yt-dlp/commit/5438593a35b7b042fc48fe29cad0b9039f07c9bb) by [bashonly](https://github.com/bashonly) + - [Run core tests with dependencies](https://github.com/yt-dlp/yt-dlp/commit/700444c23ddb65f618c2abd942acdc0c58c650b1) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) +- **cleanup** + - [Fix changelog typo](https://github.com/yt-dlp/yt-dlp/commit/a9d3f4b20a3533d2a40104c85bc2cc6c2564c800) by [bashonly](https://github.com/bashonly) + - [Update documentation for master and nightly channels](https://github.com/yt-dlp/yt-dlp/commit/a00af29853b8c7350ce086f4cab8c2c9cf2fcf1d) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - Miscellaneous: [b012271](https://github.com/yt-dlp/yt-dlp/commit/b012271d01b59759e4eefeab0308698cd9e7224c) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **test**: update: [Implement simple updater unit tests](https://github.com/yt-dlp/yt-dlp/commit/87264d4fdadcddd91289b968dd0e4bf58d449267) by [bashonly](https://github.com/bashonly) + +### 2023.10.13 + +#### Core changes +- [Ensure thumbnail output directory exists](https://github.com/yt-dlp/yt-dlp/commit/2acd1d555ef89851c73773776715d3de9a0e30b9) ([#7985](https://github.com/yt-dlp/yt-dlp/issues/7985)) by [Riteo](https://github.com/Riteo) +- **utils** + - `js_to_json`: [Fix `Date` constructor parsing](https://github.com/yt-dlp/yt-dlp/commit/9d7ded6419089c1bf252496073f73ad90ed71004) ([#8295](https://github.com/yt-dlp/yt-dlp/issues/8295)) by [awalgarg](https://github.com/awalgarg), [Grub4K](https://github.com/Grub4K) + - `write_xattr`: [Use `os.setxattr` if available](https://github.com/yt-dlp/yt-dlp/commit/84e26038d4002e763ea51ca1bdce4f7e63c540bf) ([#8205](https://github.com/yt-dlp/yt-dlp/issues/8205)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- **artetv**: [Support age-restricted content](https://github.com/yt-dlp/yt-dlp/commit/09f815ad52843219a7ee3f2a0dddf6c250c91f0c) ([#8301](https://github.com/yt-dlp/yt-dlp/issues/8301)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- **jtbc**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b286ec68f1f28798b3e371f888a2ed97d399cf77) ([#8314](https://github.com/yt-dlp/yt-dlp/issues/8314)) by [seproDev](https://github.com/seproDev) +- **mbn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e030b6b6fba7b2f4614ad2ab9f7649d40a2dd305) ([#8312](https://github.com/yt-dlp/yt-dlp/issues/8312)) by [seproDev](https://github.com/seproDev) +- **nhk**: [Fix Japanese-language VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/4de94b9e165bfd6421a692f5f2eabcdb08edcb71) ([#8309](https://github.com/yt-dlp/yt-dlp/issues/8309)) by [garret1317](https://github.com/garret1317) +- **radiko**: [Fix bug with `downloader_options`](https://github.com/yt-dlp/yt-dlp/commit/b9316642313bbc9e209ac0d2276d37ba60bceb49) by [bashonly](https://github.com/bashonly) +- **tenplay**: [Add support for seasons](https://github.com/yt-dlp/yt-dlp/commit/88a99c87b680ae59002534a517e191f46c42cbd4) ([#7939](https://github.com/yt-dlp/yt-dlp/issues/7939)) by [midnightveil](https://github.com/midnightveil) +- **youku**: [Improve tudou.com support](https://github.com/yt-dlp/yt-dlp/commit/b7098d46b552a9322c6cea39ba80be5229f922de) ([#8160](https://github.com/yt-dlp/yt-dlp/issues/8160)) by [naginatana](https://github.com/naginatana) +- **youtube**: [Fix bug with `--extractor-retries inf`](https://github.com/yt-dlp/yt-dlp/commit/feebf6d02fc9651331eee2af5e08e6112288163b) ([#8328](https://github.com/yt-dlp/yt-dlp/issues/8328)) by [Grub4K](https://github.com/Grub4K) + +#### Downloader changes +- **fragment**: [Improve progress calculation](https://github.com/yt-dlp/yt-dlp/commit/1c51c520f7b511ebd9e4eb7322285a8c31eedbbd) ([#8241](https://github.com/yt-dlp/yt-dlp/issues/8241)) by [Grub4K](https://github.com/Grub4K) + +#### Misc. changes +- **cleanup**: Miscellaneous: [b634ba7](https://github.com/yt-dlp/yt-dlp/commit/b634ba742d8f38ce9ecfa0546485728b0c6c59d1) by [bashonly](https://github.com/bashonly), [gamer191](https://github.com/gamer191) + +### 2023.10.07 + +#### Extractor changes +- **abc.net.au**: iview: [Improve `episode` extraction](https://github.com/yt-dlp/yt-dlp/commit/a9efb4b8d74f3583450ffda0ee57259a47d39c70) ([#8201](https://github.com/yt-dlp/yt-dlp/issues/8201)) by [xofe](https://github.com/xofe) +- **erocast**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/47c598783c98c179e04dd12c2a3fee0f3dc53087) ([#8264](https://github.com/yt-dlp/yt-dlp/issues/8264)) by [madewokherd](https://github.com/madewokherd) +- **gofile**: [Fix token cookie bug](https://github.com/yt-dlp/yt-dlp/commit/0730d5a966fa8a937d84bfb7f68be5198acb039b) by [bashonly](https://github.com/bashonly) +- **iq.com**: [Fix extraction and subtitles](https://github.com/yt-dlp/yt-dlp/commit/35d9cbaf9638ccc9daf8a863063b2e7c135bc664) ([#8260](https://github.com/yt-dlp/yt-dlp/issues/8260)) by [AS6939](https://github.com/AS6939) +- **lbry** + - [Add playlist support](https://github.com/yt-dlp/yt-dlp/commit/48cceec1ddb8649b5e771df8df79eb9c39c82b90) ([#8213](https://github.com/yt-dlp/yt-dlp/issues/8213)) by [bashonly](https://github.com/bashonly), [drzraf](https://github.com/drzraf), [Grub4K](https://github.com/Grub4K) + - [Extract `uploader_id`](https://github.com/yt-dlp/yt-dlp/commit/0e722f2f3ca42e634fd7b06ee70b16bf833ce132) ([#8244](https://github.com/yt-dlp/yt-dlp/issues/8244)) by [drzraf](https://github.com/drzraf) +- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/91a670a4f7babe9c8aa2018f57d8c8952a6f49d8) ([#7785](https://github.com/yt-dlp/yt-dlp/issues/7785)) by [jiru](https://github.com/jiru) +- **neteasemusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/f980df734cf5c0eaded2f7b38c6c60bccfeebb48) ([#8181](https://github.com/yt-dlp/yt-dlp/issues/8181)) by [c-basalt](https://github.com/c-basalt) +- **nhk**: [Fix VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/e831c80e8b2fc025b3b67d82974cc59e3526fdc8) ([#8249](https://github.com/yt-dlp/yt-dlp/issues/8249)) by [garret1317](https://github.com/garret1317) +- **radiko**: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2ad3873f0dfa9285c91d2160e36c039e69d597c7) ([#8221](https://github.com/yt-dlp/yt-dlp/issues/8221)) by [garret1317](https://github.com/garret1317) +- **substack** + - [Fix download cookies bug](https://github.com/yt-dlp/yt-dlp/commit/2f2dda3a7e85148773da3cdbc03ac9949ec1bc45) ([#8219](https://github.com/yt-dlp/yt-dlp/issues/8219)) by [handlerug](https://github.com/handlerug) + - [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/fbcc299bd8a19cf8b3c8805d6c268a9110230973) ([#8218](https://github.com/yt-dlp/yt-dlp/issues/8218)) by [handlerug](https://github.com/handlerug) +- **theta**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/792f1e64f6a2beac51e85408d142b3118115c4fd) ([#8251](https://github.com/yt-dlp/yt-dlp/issues/8251)) by [alerikaisattera](https://github.com/alerikaisattera) +- **wrestleuniversevod**: [Call API with device ID](https://github.com/yt-dlp/yt-dlp/commit/b095fd3fa9d58a65dc9b830bd63b9d909422aa86) ([#8272](https://github.com/yt-dlp/yt-dlp/issues/8272)) by [bashonly](https://github.com/bashonly) +- **xhamster**: user: [Support creator urls](https://github.com/yt-dlp/yt-dlp/commit/cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3) ([#8232](https://github.com/yt-dlp/yt-dlp/issues/8232)) by [Grub4K](https://github.com/Grub4K) +- **youtube** + - [Fix `heatmap` extraction](https://github.com/yt-dlp/yt-dlp/commit/03e85ea99db76a2fddb65bf46f8819bda780aaf3) ([#8299](https://github.com/yt-dlp/yt-dlp/issues/8299)) by [bashonly](https://github.com/bashonly) + - [Raise a warning for `Incomplete Data` instead of an error](https://github.com/yt-dlp/yt-dlp/commit/eb5bdbfa70126c7d5355cc0954b63720522e462c) ([#8238](https://github.com/yt-dlp/yt-dlp/issues/8238)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **cleanup** + - [Update extractor tests](https://github.com/yt-dlp/yt-dlp/commit/19c90e405b4137c06dfe6f9aaa02396df0da93e5) ([#7718](https://github.com/yt-dlp/yt-dlp/issues/7718)) by [trainman261](https://github.com/trainman261) + - Miscellaneous: [377e85a](https://github.com/yt-dlp/yt-dlp/commit/377e85a1797db9e98b78b38203ed9d4ded229991) by [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K) + +### 2023.09.24 + +#### Important changes +- **The minimum *recommended* Python version has been raised to 3.8** +Since Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803) +- Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg) + - The shell escape function is now using `""` instead of `\"`. + - `utils.Popen` has been patched to properly quote commands. + +#### Core changes +- [Fix HTTP headers and cookie handling](https://github.com/yt-dlp/yt-dlp/commit/6c5211cebeacfc53ad5d5ddf4a659be76039656f) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +- [Fix `--check-formats`](https://github.com/yt-dlp/yt-dlp/commit/8cb7fc44db010e965d808ee679ef0725cb6e147c) by [pukkandan](https://github.com/pukkandan) +- [Fix support for upcoming Python 3.12](https://github.com/yt-dlp/yt-dlp/commit/836e06d246512f286f30c1371b2c54b72c9ecd93) ([#8130](https://github.com/yt-dlp/yt-dlp/issues/8130)) by [Grub4K](https://github.com/Grub4K) +- [Merged with youtube-dl 66ab08](https://github.com/yt-dlp/yt-dlp/commit/9d6254069c75877bc88bc3584f4326fb1853a543) by [coletdjnz](https://github.com/coletdjnz) +- [Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)](https://github.com/yt-dlp/yt-dlp/commit/de015e930747165dbb8fcd360f8775fd973b7d6e) by [Grub4K](https://github.com/Grub4K) +- [Raise minimum recommended Python version to 3.8](https://github.com/yt-dlp/yt-dlp/commit/61bdf15fc7400601c3da1aa7a43917310a5bf391) ([#8183](https://github.com/yt-dlp/yt-dlp/issues/8183)) by [Grub4K](https://github.com/Grub4K) +- [`FFmpegFixupM3u8PP` may need to run with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/f73c11803579889dc8e1c99e25dba9a22fef39d8) by [pukkandan](https://github.com/pukkandan) +- **compat** + - [Add `types.NoneType`](https://github.com/yt-dlp/yt-dlp/commit/e0c4db04dc82a699bdabd9821ddc239ebe17d30a) by [pukkandan](https://github.com/pukkandan) (With fixes in [25b6e8f](https://github.com/yt-dlp/yt-dlp/commit/25b6e8f94679b4458550702b46e61249b875a4fd)) + - [Deprecate old functions](https://github.com/yt-dlp/yt-dlp/commit/3d2623a898196640f7cc0fc8b70118ff19e6925d) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) + - [Ensure submodules are imported correctly](https://github.com/yt-dlp/yt-dlp/commit/a250b247334ce9f641e709cbb64974da6034a2b3) by [pukkandan](https://github.com/pukkandan) +- **cookies**: [Containers JSON should be opened as utf-8](https://github.com/yt-dlp/yt-dlp/commit/dab87ca23650fd87184ff5286b53e6985b59f71d) ([#7800](https://github.com/yt-dlp/yt-dlp/issues/7800)) by [bashonly](https://github.com/bashonly) +- **dependencies**: [Handle deprecation of `sqlite3.version`](https://github.com/yt-dlp/yt-dlp/commit/35f9a306e6934793cff100200cd03f288ec33f11) ([#8167](https://github.com/yt-dlp/yt-dlp/issues/8167)) by [bashonly](https://github.com/bashonly) +- **outtmpl**: [Fix replacement for `playlist_index`](https://github.com/yt-dlp/yt-dlp/commit/a264433c9fba147ecae2420091614186cfeeb895) by [pukkandan](https://github.com/pukkandan) +- **utils** + - [Add temporary shim for logging](https://github.com/yt-dlp/yt-dlp/commit/1b392f905d20ef1f1b300b180f867d43c9ce49b8) by [pukkandan](https://github.com/pukkandan) + - [Improve `parse_duration`](https://github.com/yt-dlp/yt-dlp/commit/af86873218c24c3859ccf575a87f2b00a73b49d0) by [bashonly](https://github.com/bashonly) + - HTTPHeaderDict: [Handle byte values](https://github.com/yt-dlp/yt-dlp/commit/3f7965105d8d2048359e67c1e8b8ebd51588143b) by [pukkandan](https://github.com/pukkandan) + - `clean_podcast_url`: [Handle more trackers](https://github.com/yt-dlp/yt-dlp/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2) ([#7556](https://github.com/yt-dlp/yt-dlp/issues/7556)) by [bashonly](https://github.com/bashonly), [mabdelfattah](https://github.com/mabdelfattah) + - `js_to_json`: [Handle `Array` objects](https://github.com/yt-dlp/yt-dlp/commit/52414d64ca7b92d3f83964cdd68247989b0c4625) by [Grub4K](https://github.com/Grub4K), [std-move](https://github.com/std-move) + +#### Extractor changes +- [Extract subtitles from SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/550e65410a7a1b105923494ac44460a4dc1a15d9) ([#7667](https://github.com/yt-dlp/yt-dlp/issues/7667)) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +- [Fix `--load-pages`](https://github.com/yt-dlp/yt-dlp/commit/81b4712bca608b9015aa68a4d96661d56e9cb894) by [pukkandan](https://github.com/pukkandan) +- [Make `_search_nuxt_data` more lenient](https://github.com/yt-dlp/yt-dlp/commit/904a19ee93195ce0bd4b08bd22b186120afb5b17) by [std-move](https://github.com/std-move) +- **abematv** + - [Fix proxy handling](https://github.com/yt-dlp/yt-dlp/commit/497bbbbd7328cb705f70eced94dbd90993819a46) ([#8046](https://github.com/yt-dlp/yt-dlp/issues/8046)) by [SevenLives](https://github.com/SevenLives) + - [Temporary fix for protocol handler](https://github.com/yt-dlp/yt-dlp/commit/9f66247289b9f8ecf931833b3f5f127274dd2161) by [pukkandan](https://github.com/pukkandan) +- **amazonminitv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/538d37671a17e0782d17f08df17800e2e3bd57c8) by [bashonly](https://github.com/bashonly), [GautamMKGarg](https://github.com/GautamMKGarg) +- **antenna**: [Support antenna.gr](https://github.com/yt-dlp/yt-dlp/commit/665876034c8d3c031443f6b4958bed02ccdf4164) ([#7584](https://github.com/yt-dlp/yt-dlp/issues/7584)) by [stdedos](https://github.com/stdedos) +- **artetv**: [Fix HLS formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c2da0b5ea215298135f76e3dc14b972a3c4afacb) by [bashonly](https://github.com/bashonly) +- **axs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f) ([#8094](https://github.com/yt-dlp/yt-dlp/issues/8094)) by [barsnick](https://github.com/barsnick) +- **banbye**: [Support video ids containing a hyphen](https://github.com/yt-dlp/yt-dlp/commit/578a82e497502b951036ce9da6fe0dac6937ac27) ([#8059](https://github.com/yt-dlp/yt-dlp/issues/8059)) by [kshitiz305](https://github.com/kshitiz305) +- **bbc**: [Extract tracklist as chapters](https://github.com/yt-dlp/yt-dlp/commit/eda0e415d26eb084e570cf5372d38ee1f616b70f) ([#7788](https://github.com/yt-dlp/yt-dlp/issues/7788)) by [garret1317](https://github.com/garret1317) +- **bild.de**: [Extract HLS formats](https://github.com/yt-dlp/yt-dlp/commit/b4c1c408c63724339eb12b16c91b253a7ee62cfa) ([#8032](https://github.com/yt-dlp/yt-dlp/issues/8032)) by [barsnick](https://github.com/barsnick) +- **bilibili** + - [Add support for series, favorites and watch later](https://github.com/yt-dlp/yt-dlp/commit/9e68747f9607f05e92bb7d9b6e79d678b50070e1) ([#7518](https://github.com/yt-dlp/yt-dlp/issues/7518)) by [c-basalt](https://github.com/c-basalt) + - [Extract Dolby audio formats](https://github.com/yt-dlp/yt-dlp/commit/b84fda7388dd20d38921e23b469147f3957c1812) ([#8142](https://github.com/yt-dlp/yt-dlp/issues/8142)) by [ClosedPort22](https://github.com/ClosedPort22) + - [Extract `format_id`](https://github.com/yt-dlp/yt-dlp/commit/5336bf57a7061e0955a37f0542fc8ebf50d55b17) ([#7555](https://github.com/yt-dlp/yt-dlp/issues/7555)) by [c-basalt](https://github.com/c-basalt) +- **bilibilibangumi**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f) ([#7337](https://github.com/yt-dlp/yt-dlp/issues/7337)) by [GD-Slime](https://github.com/GD-Slime) +- **bpb**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/f659e6439444ac64305b5c80688cd82f59d2279c) ([#8119](https://github.com/yt-dlp/yt-dlp/issues/8119)) by [Grub4K](https://github.com/Grub4K) +- **brilliantpala**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/92feb5654c5a4c81ba872904a618700fcbb3e546) ([#6680](https://github.com/yt-dlp/yt-dlp/issues/6680)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **canal1, caracoltvplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b3febedbeb662dfdf9b5c1d5799039ad4fc969de) ([#7151](https://github.com/yt-dlp/yt-dlp/issues/7151)) by [elyse0](https://github.com/elyse0) +- **cbc**: [Ignore any 426 from API](https://github.com/yt-dlp/yt-dlp/commit/9bf14be775289bd88cc1f5c89fd761ae51879484) ([#7689](https://github.com/yt-dlp/yt-dlp/issues/7689)) by [makew0rld](https://github.com/makew0rld) +- **cbcplayer**: [Extract HLS formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/339c339fec095ff4141b20e6aa83629117fb26df) ([#7484](https://github.com/yt-dlp/yt-dlp/issues/7484)) by [trainman261](https://github.com/trainman261) +- **cbcplayerplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed711897814f3ee0b1822e4205e74133467e8f1c) ([#7870](https://github.com/yt-dlp/yt-dlp/issues/7870)) by [trainman261](https://github.com/trainman261) +- **cineverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/15591940ff102d1ae337d603a46d8f238c83a61f) ([#8146](https://github.com/yt-dlp/yt-dlp/issues/8146)) by [garret1317](https://github.com/garret1317) +- **crunchyroll**: [Remove initial state extraction](https://github.com/yt-dlp/yt-dlp/commit/9b16762f48914de9ac914601769c76668e433325) ([#7632](https://github.com/yt-dlp/yt-dlp/issues/7632)) by [Grub4K](https://github.com/Grub4K) +- **douyutv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd) ([#7652](https://github.com/yt-dlp/yt-dlp/issues/7652)) by [c-basalt](https://github.com/c-basalt) +- **dropbox**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8) ([#7926](https://github.com/yt-dlp/yt-dlp/issues/7926)) by [bashonly](https://github.com/bashonly), [denhotte](https://github.com/denhotte), [nathantouze](https://github.com/nathantouze) (With fixes in [099fb1b](https://github.com/yt-dlp/yt-dlp/commit/099fb1b35cf835303306549f5113d1802d79c9c7) by [bashonly](https://github.com/bashonly)) +- **eplus**: inbound: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/295fbb3ae3a7d0dd50e286be5c487cf145ed5778) ([#5782](https://github.com/yt-dlp/yt-dlp/issues/5782)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **expressen**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a5e264d74b4bd60c6e7ec4e38f1a23af4e420531) ([#8153](https://github.com/yt-dlp/yt-dlp/issues/8153)) by [kylegustavo](https://github.com/kylegustavo) +- **facebook** + - [Add dash manifest URL](https://github.com/yt-dlp/yt-dlp/commit/a854fbec56d5004f5147116a41d1dd050632a579) ([#7743](https://github.com/yt-dlp/yt-dlp/issues/7743)) by [ringus1](https://github.com/ringus1) + - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d81cc98f554d0adb87d24bfd6fabaaa803944d) ([#7890](https://github.com/yt-dlp/yt-dlp/issues/7890)) by [ringus1](https://github.com/ringus1) + - [Improve format sorting](https://github.com/yt-dlp/yt-dlp/commit/308936619c8a4f3a52d73c829c2006ff6c55fea2) ([#8074](https://github.com/yt-dlp/yt-dlp/issues/8074)) by [fireattack](https://github.com/fireattack) + - reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bb5d84c9d2f1e978c3eddfb5ccbe138036682a36) ([#7564](https://github.com/yt-dlp/yt-dlp/issues/7564)) by [bashonly](https://github.com/bashonly), [demon071](https://github.com/demon071) +- **fox**: [Support foxsports.com](https://github.com/yt-dlp/yt-dlp/commit/30b29f37159e9226e2f2d5434c9a4096ac4efa2e) ([#7724](https://github.com/yt-dlp/yt-dlp/issues/7724)) by [ischmidt20](https://github.com/ischmidt20) +- **funker530**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0ce1f48bf1cb78d40d734ce73ee1c90eccf92274) ([#8040](https://github.com/yt-dlp/yt-dlp/issues/8040)) by [04-pasha-04](https://github.com/04-pasha-04) +- **generic** + - [Fix KVS thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/53675852195d8dd859555d4789944a6887171ff8) by [bashonly](https://github.com/bashonly) + - [Fix generic title for embeds](https://github.com/yt-dlp/yt-dlp/commit/994f7ef8e6003f4b7b258528755d0b6adcc31714) by [pukkandan](https://github.com/pukkandan) +- **gofile**: [Update token](https://github.com/yt-dlp/yt-dlp/commit/99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e) by [bashonly](https://github.com/bashonly) +- **hotstar** + - [Extract `release_year`](https://github.com/yt-dlp/yt-dlp/commit/7237c8dca0590aa7438ade93f927df88c9381ec7) ([#7869](https://github.com/yt-dlp/yt-dlp/issues/7869)) by [Rajeshwaran2001](https://github.com/Rajeshwaran2001) + - [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/30ea88591b728cca0896018dbf67c2298070c669) by [bashonly](https://github.com/bashonly) + - [Support `/clips/` URLs](https://github.com/yt-dlp/yt-dlp/commit/86eeb044c2342d68c6ef177577f87852e6badd85) ([#7710](https://github.com/yt-dlp/yt-dlp/issues/7710)) by [bashonly](https://github.com/bashonly) +- **hungama**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/4b3a6ef1b3e235ba9a45142830b6edb357c71696) ([#7757](https://github.com/yt-dlp/yt-dlp/issues/7757)) by [bashonly](https://github.com/bashonly), [Yalab7](https://github.com/Yalab7) +- **indavideoembed**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/63e0c5748c0eb461a2ccca4181616eb930b4b750) ([#8129](https://github.com/yt-dlp/yt-dlp/issues/8129)) by [aky-01](https://github.com/aky-01) +- **iprima**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/568f08051841aedea968258889539741e26009e9) ([#7216](https://github.com/yt-dlp/yt-dlp/issues/7216)) by [std-move](https://github.com/std-move) +- **lbry**: [Fix original format extraction](https://github.com/yt-dlp/yt-dlp/commit/127a22460658ac39cbe5c4b3fb88d578363e0dfa) ([#7711](https://github.com/yt-dlp/yt-dlp/issues/7711)) by [bashonly](https://github.com/bashonly) +- **lecturio**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/efa2339502a37cf13ae7f143bd8b2c28f452d1cd) ([#7649](https://github.com/yt-dlp/yt-dlp/issues/7649)) by [simon300000](https://github.com/simon300000) +- **magellantv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f4ea501551526ebcb54d19b84cf0ebe798583a85) ([#7616](https://github.com/yt-dlp/yt-dlp/issues/7616)) by [bashonly](https://github.com/bashonly) +- **massengeschmack.tv**: [Fix title extraction](https://github.com/yt-dlp/yt-dlp/commit/81f46ac573dc443ad48560f308582a26784d3015) ([#7813](https://github.com/yt-dlp/yt-dlp/issues/7813)) by [sb0stn](https://github.com/sb0stn) +- **media.ccc.de**: lists: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/cf11b40ac40e3d23a6352753296f3a732886efb9) ([#8144](https://github.com/yt-dlp/yt-dlp/issues/8144)) by [Rohxn16](https://github.com/Rohxn16) +- **mediaite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/630a55df8de7747e79aa680959d785dfff2c4b76) ([#7923](https://github.com/yt-dlp/yt-dlp/issues/7923)) by [Grabien](https://github.com/Grabien) +- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5) ([#8086](https://github.com/yt-dlp/yt-dlp/issues/8086)) by [bashonly](https://github.com/bashonly), [zhallgato](https://github.com/zhallgato) +- **mediastream**: [Make embed extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/635ae31f68a3ac7f6393d59657ed711e34ee3552) by [bashonly](https://github.com/bashonly) +- **mixcloud**: [Update API URL](https://github.com/yt-dlp/yt-dlp/commit/7b71643cc986de9a3768dac4ac9b64f4d05e7f5e) ([#8114](https://github.com/yt-dlp/yt-dlp/issues/8114)) by [garret1317](https://github.com/garret1317) +- **monstercat**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eaee21bf71889d495076037cbe590c8c0b21ef3a) ([#8133](https://github.com/yt-dlp/yt-dlp/issues/8133)) by [garret1317](https://github.com/garret1317) +- **motortrendondemand**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6) ([#7683](https://github.com/yt-dlp/yt-dlp/issues/7683)) by [AmirAflak](https://github.com/AmirAflak) +- **museai**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65cfa2b057d7946fbe322155a778fe206556d0c6) ([#7614](https://github.com/yt-dlp/yt-dlp/issues/7614)) by [bashonly](https://github.com/bashonly) +- **mzaalo**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/d7aee8e310b2c4f21d50aac0b420e1b3abde21a4) by [bashonly](https://github.com/bashonly) +- **n1info**: article: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8ac5b6d96ae5c60cd5ae2495949e0068a6754c45) ([#7373](https://github.com/yt-dlp/yt-dlp/issues/7373)) by [u-spec-png](https://github.com/u-spec-png) +- **nfl.com**: plus, replay: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d) ([#7838](https://github.com/yt-dlp/yt-dlp/issues/7838)) by [bashonly](https://github.com/bashonly) +- **niconicochannelplus**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/698beb9a497f51693e64d167e572ff9efa4bc25f) ([#5686](https://github.com/yt-dlp/yt-dlp/issues/5686)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **nitter**: [Fix title extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/a83da3717d30697102e76f63a6f29d77f9373c2a) ([#8102](https://github.com/yt-dlp/yt-dlp/issues/8102)) by [ApoorvShah111](https://github.com/ApoorvShah111) +- **noodlemagazine**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bae4834245a708fff97219849ec880c319c88bc6) ([#7830](https://github.com/yt-dlp/yt-dlp/issues/7830)) by [RedDeffender](https://github.com/RedDeffender) (With fixes in [69dbfe0](https://github.com/yt-dlp/yt-dlp/commit/69dbfe01c47cd078682a87f179f5846e2679e927) by [bashonly](https://github.com/bashonly)) +- **novaembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2269065ad60cb0ab62408ae6a7b20283e5252232) ([#7910](https://github.com/yt-dlp/yt-dlp/issues/7910)) by [std-move](https://github.com/std-move) +- **patreoncampaign**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/11de6fec9c9b8d34d1f90c8e6218ec58a3471b58) ([#7664](https://github.com/yt-dlp/yt-dlp/issues/7664)) by [bashonly](https://github.com/bashonly) +- **pbs**: [Add extractor `PBSKidsIE`](https://github.com/yt-dlp/yt-dlp/commit/6d6081dda1290a85bdab6717f239289e3aa74c8e) ([#7602](https://github.com/yt-dlp/yt-dlp/issues/7602)) by [snixon](https://github.com/snixon) +- **piapro**: [Support `/content` URL](https://github.com/yt-dlp/yt-dlp/commit/1bcb9fe8715b1f288efc322be3de409ee0597080) ([#7592](https://github.com/yt-dlp/yt-dlp/issues/7592)) by [FinnRG](https://github.com/FinnRG) +- **piaulizaportal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6636021206dad17c7745ae6bce6cb73d6f2ef319) ([#7903](https://github.com/yt-dlp/yt-dlp/issues/7903)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **picartovod**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/db9743894071760f994f640a4c24358f749a78c0) ([#7727](https://github.com/yt-dlp/yt-dlp/issues/7727)) by [Frankgoji](https://github.com/Frankgoji) +- **pornbox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/40999467f72db074a3f13057da9bf82a857530fe) ([#7386](https://github.com/yt-dlp/yt-dlp/issues/7386)) by [niemands](https://github.com/niemands) +- **pornhub**: [Update access cookies for UK](https://github.com/yt-dlp/yt-dlp/commit/1d3d579c2142f69831b6ae140e1d8e824e07fa0e) ([#7591](https://github.com/yt-dlp/yt-dlp/issues/7591)) by [zhong-yiyu](https://github.com/zhong-yiyu) +- **pr0gramm**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/b532556d0a85e7d76f8f0880861232fb706ddbc5) ([#8151](https://github.com/yt-dlp/yt-dlp/issues/8151)) by [Grub4K](https://github.com/Grub4K) +- **radiofrance**: [Add support for livestreams, podcasts, playlists](https://github.com/yt-dlp/yt-dlp/commit/ba8e9eb2c8bbb699f314169fab8e544437ad731e) ([#7006](https://github.com/yt-dlp/yt-dlp/issues/7006)) by [elyse0](https://github.com/elyse0) +- **rbgtum**: [Fix extraction and support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5fccabac27ca3c1165ade1b0df6fbadc24258dc2) ([#7690](https://github.com/yt-dlp/yt-dlp/issues/7690)) by [simon300000](https://github.com/simon300000) +- **reddit** + - [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/20c3c9b433dd47faf0dbde6b46e4e34eb76109a5) by [bashonly](https://github.com/bashonly) + - [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/9a04113dfbb69b904e4e2bea736da293505786b8) by [bashonly](https://github.com/bashonly) +- **rtvslo**: [Fix format extraction](https://github.com/yt-dlp/yt-dlp/commit/94389b225d9bcf29aa7ba8afaf1bbd7c62204eae) ([#8131](https://github.com/yt-dlp/yt-dlp/issues/8131)) by [bashonly](https://github.com/bashonly) +- **rule34video**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/58493923e9b6f774947a2131e5258e9f3cf816be) ([#7117](https://github.com/yt-dlp/yt-dlp/issues/7117)) by [soundchaser128](https://github.com/soundchaser128) +- **rumble**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/23d829a3420450bcfb0788e6fb2cf4f6acdbe596) ([#8035](https://github.com/yt-dlp/yt-dlp/issues/8035)) by [trislee](https://github.com/trislee) +- **s4c** + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b9de629d78ce31699f2de886071dc257830f9676) ([#7730](https://github.com/yt-dlp/yt-dlp/issues/7730)) by [ifan-t](https://github.com/ifan-t) + - [Add series support and extract subs/thumbs](https://github.com/yt-dlp/yt-dlp/commit/fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0) ([#7776](https://github.com/yt-dlp/yt-dlp/issues/7776)) by [ifan-t](https://github.com/ifan-t) +- **sohu**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5be7e978867b5f66ad6786c674d79d40e950ae16) ([#7628](https://github.com/yt-dlp/yt-dlp/issues/7628)) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt) +- **stageplus**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/56b3dc03354b75be995759d8441d2754c0442b9a) ([#7929](https://github.com/yt-dlp/yt-dlp/issues/7929)) by [bashonly](https://github.com/bashonly) +- **streamanity**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/2cfe221fbbe46faa3f46552c08d947a51f424903) ([#7571](https://github.com/yt-dlp/yt-dlp/issues/7571)) by [alerikaisattera](https://github.com/alerikaisattera) +- **svtplay**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/2301b5c1b77a65abbb46b72f91e1e4666fd5d985) ([#7789](https://github.com/yt-dlp/yt-dlp/issues/7789)) by [dirkf](https://github.com/dirkf), [wader](https://github.com/wader) +- **tbsjp**: [Add episode, program, playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/876b70c8edf4c0147f180bd981fbc4d625cbfb9c) ([#7765](https://github.com/yt-dlp/yt-dlp/issues/7765)) by [garret1317](https://github.com/garret1317) +- **tiktok** + - [Fix audio-only format extraction](https://github.com/yt-dlp/yt-dlp/commit/b09bd0c19648f60c59fb980cd454cb0069959fb9) ([#7712](https://github.com/yt-dlp/yt-dlp/issues/7712)) by [bashonly](https://github.com/bashonly) + - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/069cbece9dba6384f1cc5fcfc7ce562a31af42fc) by [bashonly](https://github.com/bashonly) +- **triller**: [Fix unlisted video extraction](https://github.com/yt-dlp/yt-dlp/commit/39837ae3199aa934299badbd0d63243ed639e6c8) ([#7670](https://github.com/yt-dlp/yt-dlp/issues/7670)) by [bashonly](https://github.com/bashonly) +- **tv5mondeplus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d3d658f4c558ee7d72b1c01b46f2126948681cd) ([#7952](https://github.com/yt-dlp/yt-dlp/issues/7952)) by [dirkf](https://github.com/dirkf), [korli](https://github.com/korli) +- **twitcasting** + - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cebbd33b1c678149fc8f0e254db6fc0da317ea80) ([#8120](https://github.com/yt-dlp/yt-dlp/issues/8120)) by [c-basalt](https://github.com/c-basalt) + - [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/c1d71d0d9f41db5e4306c86af232f5f6220a130b) ([#7975](https://github.com/yt-dlp/yt-dlp/issues/7975)) by [at-wat](https://github.com/at-wat) +- **twitter** + - [Add fallback, improve error handling](https://github.com/yt-dlp/yt-dlp/commit/6014355c6142f68e20c8374e3787e5b5820f19e2) ([#7621](https://github.com/yt-dlp/yt-dlp/issues/7621)) by [bashonly](https://github.com/bashonly) + - [Fix GraphQL and legacy API](https://github.com/yt-dlp/yt-dlp/commit/92315c03774cfabb3a921884326beb4b981f786b) ([#7516](https://github.com/yt-dlp/yt-dlp/issues/7516)) by [bashonly](https://github.com/bashonly) + - [Fix retweet extraction and syndication API](https://github.com/yt-dlp/yt-dlp/commit/a006ce2b27357c15792eb5c18f06765e640b801c) ([#8016](https://github.com/yt-dlp/yt-dlp/issues/8016)) by [bashonly](https://github.com/bashonly) + - [Revert 92315c03774cfabb3a921884326beb4b981f786b](https://github.com/yt-dlp/yt-dlp/commit/b03fa7834579a01cc5fba48c0e73488a16683d48) by [pukkandan](https://github.com/pukkandan) + - spaces + - [Fix format protocol](https://github.com/yt-dlp/yt-dlp/commit/613dbce177d34ffc31053e8e01acf4bb107bcd1e) ([#7550](https://github.com/yt-dlp/yt-dlp/issues/7550)) by [bashonly](https://github.com/bashonly) + - [Pass referer header to downloader](https://github.com/yt-dlp/yt-dlp/commit/c6ef553792ed48462f9fd0e78143bef6b1a71c2e) by [bashonly](https://github.com/bashonly) +- **unsupported**: [List more sites with DRM](https://github.com/yt-dlp/yt-dlp/commit/e7057383380d7d53815f8feaf90ca3dcbde88983) by [pukkandan](https://github.com/pukkandan) +- **videa**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1) ([#8003](https://github.com/yt-dlp/yt-dlp/issues/8003)) by [aky-01](https://github.com/aky-01), [hatsomatt](https://github.com/hatsomatt) +- **vrt**: [Update token signing key](https://github.com/yt-dlp/yt-dlp/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc) ([#7519](https://github.com/yt-dlp/yt-dlp/issues/7519)) by [Zprokkel](https://github.com/Zprokkel) +- **wat.tv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7cccab79e7d00ed965b48b8cefce1da8a0513409) ([#7898](https://github.com/yt-dlp/yt-dlp/issues/7898)) by [davinkevin](https://github.com/davinkevin) +- **wdr**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0395498d7065aa5e55bac85fa9354b4b0d48eb) ([#7979](https://github.com/yt-dlp/yt-dlp/issues/7979)) by [szabyg](https://github.com/szabyg) +- **web.archive**: vlive: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9652bca1bd02f6bc1b8cb1e186f2ccbf32225561) ([#8132](https://github.com/yt-dlp/yt-dlp/issues/8132)) by [bashonly](https://github.com/bashonly) +- **weibo**: [Fix extractor and support user extraction](https://github.com/yt-dlp/yt-dlp/commit/69b03f84f8378b0b5a2fbae56f9b7d860b2f529e) ([#7657](https://github.com/yt-dlp/yt-dlp/issues/7657)) by [c-basalt](https://github.com/c-basalt) +- **weverse**: [Support extraction without auth](https://github.com/yt-dlp/yt-dlp/commit/c2d8ee0000302aba63476b7d5bd8793e57b6c8c6) ([#7924](https://github.com/yt-dlp/yt-dlp/issues/7924)) by [seproDev](https://github.com/seproDev) +- **wimbledon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a15fcd299e767a510debd8dc1646fe863b96ce0e) ([#7551](https://github.com/yt-dlp/yt-dlp/issues/7551)) by [nnoboa](https://github.com/nnoboa) +- **wrestleuniverseppv**: [Fix HLS AES key extraction](https://github.com/yt-dlp/yt-dlp/commit/dae349da97cafe7357106a8f3187fd48a2ad1210) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Add `player_params` extractor arg](https://github.com/yt-dlp/yt-dlp/commit/ba06d77a316650ff057347d224b5afa8b203ad65) ([#7719](https://github.com/yt-dlp/yt-dlp/issues/7719)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix `player_params` arg being converted to lowercase](https://github.com/yt-dlp/yt-dlp/commit/546b2c28a106cf8101d481b215b676d1b091d276) by [coletdjnz](https://github.com/coletdjnz) + - [Fix consent cookie](https://github.com/yt-dlp/yt-dlp/commit/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620) ([#7774](https://github.com/yt-dlp/yt-dlp/issues/7774)) by [coletdjnz](https://github.com/coletdjnz) + - tab: [Detect looping feeds](https://github.com/yt-dlp/yt-dlp/commit/1ba6fe9db5f660d5538588315c23ad6cf0371c5f) ([#6621](https://github.com/yt-dlp/yt-dlp/issues/6621)) by [coletdjnz](https://github.com/coletdjnz) +- **zaiko**: [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/ecef42c3adbcb6a84405139047923c4967316f28) ([#8054](https://github.com/yt-dlp/yt-dlp/issues/8054)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **zee5**: [Update access token endpoint](https://github.com/yt-dlp/yt-dlp/commit/a0de8bb8601146b8f87bf7cd562eef8bfb4690be) ([#7914](https://github.com/yt-dlp/yt-dlp/issues/7914)) by [bashonly](https://github.com/bashonly) +- **zoom**: [Extract duration](https://github.com/yt-dlp/yt-dlp/commit/66cc64ff6696f9921ff112a278542f8d999ffea4) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **external** + - [Fix ffmpeg input from stdin](https://github.com/yt-dlp/yt-dlp/commit/e57eb98222d29cc4c09ee975d3c492274a6e5be3) ([#7655](https://github.com/yt-dlp/yt-dlp/issues/7655)) by [bashonly](https://github.com/bashonly) + - [Fixes to cookie handling](https://github.com/yt-dlp/yt-dlp/commit/42ded0a429c20ec13dc006825e1508d9a02f0ad4) by [bashonly](https://github.com/bashonly) + +#### Postprocessor changes +- **embedthumbnail**: [Support `m4v`](https://github.com/yt-dlp/yt-dlp/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c) ([#7583](https://github.com/yt-dlp/yt-dlp/issues/7583)) by [Neurognostic](https://github.com/Neurognostic) + +#### Networking changes +- [Add module](https://github.com/yt-dlp/yt-dlp/commit/c365dba8430ee33abda85d31f95128605bf240eb) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [pukkandan](https://github.com/pukkandan) +- [Add request handler preference framework](https://github.com/yt-dlp/yt-dlp/commit/db7b054a6111ca387220d0eb87bf342f9c130eb8) ([#7603](https://github.com/yt-dlp/yt-dlp/issues/7603)) by [coletdjnz](https://github.com/coletdjnz) +- [Add strict Request extension checking](https://github.com/yt-dlp/yt-dlp/commit/86aea0d3a213da3be1da638b9b828e6f0ee1d59f) ([#7604](https://github.com/yt-dlp/yt-dlp/issues/7604)) by [coletdjnz](https://github.com/coletdjnz) +- [Fix POST requests with zero-length payloads](https://github.com/yt-dlp/yt-dlp/commit/71baa490ebd3655746430f208a9b605d120cd315) ([#7648](https://github.com/yt-dlp/yt-dlp/issues/7648)) by [bashonly](https://github.com/bashonly) +- [Fix `--legacy-server-connect`](https://github.com/yt-dlp/yt-dlp/commit/75dc8e673b481a82d0688aeec30f6c65d82bb359) ([#7645](https://github.com/yt-dlp/yt-dlp/issues/7645)) by [bashonly](https://github.com/bashonly) +- [Fix various socks proxy bugs](https://github.com/yt-dlp/yt-dlp/commit/20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69) ([#8065](https://github.com/yt-dlp/yt-dlp/issues/8065)) by [coletdjnz](https://github.com/coletdjnz) +- [Ignore invalid proxies in env](https://github.com/yt-dlp/yt-dlp/commit/bbeacff7fcaa3b521066088a5ccbf34ef5070d1d) ([#7704](https://github.com/yt-dlp/yt-dlp/issues/7704)) by [coletdjnz](https://github.com/coletdjnz) +- [Rewrite architecture](https://github.com/yt-dlp/yt-dlp/commit/227bf1a33be7b89cd7d44ad046844c4ccba104f4) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz) +- **Request Handler** + - urllib + - [Remove dot segments during URL normalization](https://github.com/yt-dlp/yt-dlp/commit/4bf912282a34b58b6b35d8f7e6be535770c89c76) ([#7662](https://github.com/yt-dlp/yt-dlp/issues/7662)) by [coletdjnz](https://github.com/coletdjnz) + - [Simplify gzip decoding](https://github.com/yt-dlp/yt-dlp/commit/59e92b1f1833440bb2190f847eb735cf0f90bc85) ([#7611](https://github.com/yt-dlp/yt-dlp/issues/7611)) by [Grub4K](https://github.com/Grub4K) (With fixes in [77bff23](https://github.com/yt-dlp/yt-dlp/commit/77bff23ee97565bab2e0d75b893a21bf7983219a)) + +#### Misc. changes +- **build**: [Make sure deprecated modules are added](https://github.com/yt-dlp/yt-dlp/commit/131d132da5c98c6c78bd7eed4b37f4458561b3d9) by [pukkandan](https://github.com/pukkandan) +- **cleanup** + - [Add color to `download-archive` message](https://github.com/yt-dlp/yt-dlp/commit/2b029ca0a9f9105c4f7626993fa60e54c9782749) ([#5138](https://github.com/yt-dlp/yt-dlp/issues/5138)) by [aaruni96](https://github.com/aaruni96), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) + - Miscellaneous + - [6148833](https://github.com/yt-dlp/yt-dlp/commit/6148833f5ceb7674142ddb8d761ffe03cee7df69), [62b5c94](https://github.com/yt-dlp/yt-dlp/commit/62b5c94cadaa5f596dc1a7083db9db12efe357be) by [pukkandan](https://github.com/pukkandan) + - [5ca095c](https://github.com/yt-dlp/yt-dlp/commit/5ca095cbcde3e32642a4fe5b2d69e8e3c785a021) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [sqrtNOT](https://github.com/sqrtNOT) + - [088add9](https://github.com/yt-dlp/yt-dlp/commit/088add9567d39b758737e4299a0e619fd89d2e8f) by [Grub4K](https://github.com/Grub4K) +- **devscripts**: `make_changelog`: [Fix changelog grouping and add networking group](https://github.com/yt-dlp/yt-dlp/commit/30ba233d4cee945756ed7344e7ddb3a90d2ae608) ([#8124](https://github.com/yt-dlp/yt-dlp/issues/8124)) by [Grub4K](https://github.com/Grub4K) +- **docs**: [Update collaborators](https://github.com/yt-dlp/yt-dlp/commit/1be0a96a4d14f629097509fcc89d15f69a8243c7) by [Grub4K](https://github.com/Grub4K) +- **test** + - [Add tests for socks proxies](https://github.com/yt-dlp/yt-dlp/commit/fcd6a76adc49d5cd8783985c7ce35384b72e545f) ([#7908](https://github.com/yt-dlp/yt-dlp/issues/7908)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix `httplib_validation_errors` test for old Python versions](https://github.com/yt-dlp/yt-dlp/commit/95abea9a03289da1384e5bda3d590223ccc0a238) ([#7677](https://github.com/yt-dlp/yt-dlp/issues/7677)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix `test_load_certifi`](https://github.com/yt-dlp/yt-dlp/commit/de20687ee6b742646128a7629b57096631a20619) by [pukkandan](https://github.com/pukkandan) + - download: [Test for `expected_exception`](https://github.com/yt-dlp/yt-dlp/commit/661c9a1d029296b28e0b2f8be8a72a43abaf6536) by [at-wat](https://github.com/at-wat) + +### 2023.07.06 + +#### Important changes +- Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj) + - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains + - Cookies are scoped when passed to external downloaders + - Add `cookies` field to info.json and deprecate `http_headers.Cookie` + +#### Core changes +- [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan) +- [Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56](https://github.com/yt-dlp/yt-dlp/commit/e59e20744eb32ce4b6ea0dece7c673be8376a710) by [pukkandan](https://github.com/pukkandan) +- [Change how `Cookie` headers are handled](https://github.com/yt-dlp/yt-dlp/commit/3121512228487c9c690d3d39bfd2579addf96e07) by [Grub4K](https://github.com/Grub4K) +- [Prevent `Cookie` leaks on HTTP redirect](https://github.com/yt-dlp/yt-dlp/commit/f8b4bcc0a791274223723488bfbfc23ea3276641) by [coletdjnz](https://github.com/coletdjnz) +- **formats**: [Fix best fallback for storyboards](https://github.com/yt-dlp/yt-dlp/commit/906c0bdcd8974340d619e99ccd613c163eb0d0c2) by [pukkandan](https://github.com/pukkandan) +- **outtmpl**: [Pad `playlist_index` etc even when with internal formatting](https://github.com/yt-dlp/yt-dlp/commit/47bcd437247152e0af5b3ebc5592db7bb66855c2) by [pukkandan](https://github.com/pukkandan) +- **utils**: clean_podcast_url: [Handle protocol in redirect URL](https://github.com/yt-dlp/yt-dlp/commit/91302ed349f34dc26cc1d661bb45a4b71f4417f7) by [pukkandan](https://github.com/pukkandan) + +#### Extractor changes +- **abc**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8f05fbae2a79ce0713077ccc68b354e63216bf20) ([#7434](https://github.com/yt-dlp/yt-dlp/issues/7434)) by [meliber](https://github.com/meliber) +- **AdultSwim**: [Extract subtitles from m3u8](https://github.com/yt-dlp/yt-dlp/commit/5e16cf92eb496b7c1541a6b1d727cb87542984db) ([#7421](https://github.com/yt-dlp/yt-dlp/issues/7421)) by [nnoboa](https://github.com/nnoboa) +- **crunchyroll**: music: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5b4b92769afcc398475e481bfa839f1158902fe9) ([#7439](https://github.com/yt-dlp/yt-dlp/issues/7439)) by [AmanSal1](https://github.com/AmanSal1), [rdamas](https://github.com/rdamas) +- **Douyin**: [Fix extraction from webpage](https://github.com/yt-dlp/yt-dlp/commit/a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2) by [bashonly](https://github.com/bashonly) +- **googledrive**: [Fix source format extraction](https://github.com/yt-dlp/yt-dlp/commit/3b7f5300c577fef40464d46d4e4037a69d51fe82) ([#7395](https://github.com/yt-dlp/yt-dlp/issues/7395)) by [RfadnjdExt](https://github.com/RfadnjdExt) +- **kick**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/ef8509c300ea50da86aea447eb214d3d6f6db6bb) by [bashonly](https://github.com/bashonly) +- **qdance**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f0a1ff118145b6449982ba401f9a9f656ecd8062) ([#7420](https://github.com/yt-dlp/yt-dlp/issues/7420)) by [bashonly](https://github.com/bashonly) +- **sbs**: [Python 3.7 compat](https://github.com/yt-dlp/yt-dlp/commit/f393bbe724b1fc6c7f754a5da507e807b2b40ad2) by [pukkandan](https://github.com/pukkandan) +- **stacommu**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/af1fd12f675220df6793fc019dff320bc76e8080) ([#7432](https://github.com/yt-dlp/yt-dlp/issues/7432)) by [urectanc](https://github.com/urectanc) +- **twitter** + - [Fix unauthenticated extraction](https://github.com/yt-dlp/yt-dlp/commit/49296437a8e5fa91dacb5446e51ab588474c85d3) ([#7476](https://github.com/yt-dlp/yt-dlp/issues/7476)) by [bashonly](https://github.com/bashonly) + - spaces: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1cffd621cb371f1563563cfb2fe37d137e8a7bee) ([#7512](https://github.com/yt-dlp/yt-dlp/issues/7512)) by [bashonly](https://github.com/bashonly) +- **vidlii**: [Handle relative URLs](https://github.com/yt-dlp/yt-dlp/commit/ad8902f616ad2541f9b9626738f1393fad89a64c) by [pukkandan](https://github.com/pukkandan) +- **vk**: VKPlay, VKPlayLive: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/8776349ef6b1f644584a92dfa00a05208a48edc4) ([#7358](https://github.com/yt-dlp/yt-dlp/issues/7358)) by [c-basalt](https://github.com/c-basalt) +- **youtube** + - [Add extractor-arg `formats`](https://github.com/yt-dlp/yt-dlp/commit/58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2) by [pukkandan](https://github.com/pukkandan) + - [Avoid false DRM detection](https://github.com/yt-dlp/yt-dlp/commit/94ed638a437fc766699d440e978982e24ce6a30a) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan) + - [Fix comments' `is_favorited`](https://github.com/yt-dlp/yt-dlp/commit/89bed013741a776506f60380b7fd89d27d0710b4) ([#7390](https://github.com/yt-dlp/yt-dlp/issues/7390)) by [bbilly1](https://github.com/bbilly1) + - [Ignore incomplete data for comment threads by default](https://github.com/yt-dlp/yt-dlp/commit/4dc4d8473c085900edc841c87c20041233d25b1f) ([#7475](https://github.com/yt-dlp/yt-dlp/issues/7475)) by [coletdjnz](https://github.com/coletdjnz) + - [Process `post_live` over 2 hours](https://github.com/yt-dlp/yt-dlp/commit/d949c10c45bfc359bdacd52e6a180169b8128958) by [pukkandan](https://github.com/pukkandan) + - stories: [Remove](https://github.com/yt-dlp/yt-dlp/commit/90db9a3c00ca80492c6a58c542e4cbf4c2710866) ([#7459](https://github.com/yt-dlp/yt-dlp/issues/7459)) by [pukkandan](https://github.com/pukkandan) + - tab: [Support shorts-only playlists](https://github.com/yt-dlp/yt-dlp/commit/fcbc9ed760be6e3455bbadfaf277b4504b06f068) ([#7425](https://github.com/yt-dlp/yt-dlp/issues/7425)) by [coletdjnz](https://github.com/coletdjnz) + +#### Downloader changes +- **aria2c**: [Add `--no-conf`](https://github.com/yt-dlp/yt-dlp/commit/8a8af356e3bba98a7f7d333aff0777d5d92130c8) by [pukkandan](https://github.com/pukkandan) +- **external**: [Scope cookies](https://github.com/yt-dlp/yt-dlp/commit/1ceb657bdd254ad961489e5060f2ccc7d556b729) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) +- **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan) + +#### Misc. changes +- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [jorgectf](https://github.com/jorgectf) +- **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan) +- **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan) +- **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan) + +### 2023.06.22 + +#### Core changes +- [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan) +- [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan) + - Support negative time-ranges + - Add `*from-url` to obey time-ranges in URL +- [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan) + +#### Extractor changes +- [Support multiple `_VALID_URL`s](https://github.com/yt-dlp/yt-dlp/commit/5fd8367496b42c7b900b896a0d5460561a2859de) ([#5812](https://github.com/yt-dlp/yt-dlp/issues/5812)) by [nixxo](https://github.com/nixxo) +- **dplay**: GlobalCyclingNetworkPlus: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/774aa09dd6aa61ced9ec818d1f67e53414d22762) ([#7360](https://github.com/yt-dlp/yt-dlp/issues/7360)) by [bashonly](https://github.com/bashonly) +- **dropout**: [Fix season extraction](https://github.com/yt-dlp/yt-dlp/commit/db22142f6f817ff673d417b4b78e8db497bf8ab3) ([#7304](https://github.com/yt-dlp/yt-dlp/issues/7304)) by [OverlordQ](https://github.com/OverlordQ) +- **motherless**: [Add gallery support, fix groups](https://github.com/yt-dlp/yt-dlp/commit/f2ff0f6f1914b82d4a51681a72cc0828115dcb4a) ([#7211](https://github.com/yt-dlp/yt-dlp/issues/7211)) by [rexlambert22](https://github.com/rexlambert22), [Ti4eeT4e](https://github.com/Ti4eeT4e) +- **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb) +- **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk) +- **youtube** + - [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142) by [pukkandan](https://github.com/pukkandan) + - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively + - IOS also has higher bit-rate 'premium' formats though they are not labeled as such + - [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan) + - [Improve nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/cd810afe2ac5567c822b7424800fc470ef2d0045) by [pukkandan](https://github.com/pukkandan) + - [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan) + +#### Misc. changes +- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700) by [pukkandan](https://github.com/pukkandan) +- **cleanup** + - Miscellaneous + - [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly) + - [812cdfa](https://github.com/yt-dlp/yt-dlp/commit/812cdfa06c33a40e73a8e04b3e6f42c084666a43) by [pukkandan](https://github.com/pukkandan) + +### 2023.06.21 + +#### Important changes +- YouTube: Improved throttling and signature fixes + +#### Core changes +- [Add `--compat-option playlist-match-filter`](https://github.com/yt-dlp/yt-dlp/commit/93b39cdbd9dcf351bfa0c4ee252805b4617fdca9) by [pukkandan](https://github.com/pukkandan) +- [Add `--no-quiet`](https://github.com/yt-dlp/yt-dlp/commit/d669772c65e8630162fd6555d0a578b246591921) by [pukkandan](https://github.com/pukkandan) +- [Add option `--color`](https://github.com/yt-dlp/yt-dlp/commit/8417f26b8a819cd7ffcd4e000ca3e45033e670fb) ([#6904](https://github.com/yt-dlp/yt-dlp/issues/6904)) by [Grub4K](https://github.com/Grub4K) +- [Add option `--netrc-cmd`](https://github.com/yt-dlp/yt-dlp/commit/db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb) ([#6682](https://github.com/yt-dlp/yt-dlp/issues/6682)) by [NDagestad](https://github.com/NDagestad), [pukkandan](https://github.com/pukkandan) +- [Add option `--xff`](https://github.com/yt-dlp/yt-dlp/commit/c16644642b08e2bf4130a6c5fa01395d8718c990) by [pukkandan](https://github.com/pukkandan) +- [Auto-select default format in `-f-`](https://github.com/yt-dlp/yt-dlp/commit/372a0f3b9dadd1e52234b498aa4c7040ef868c7d) ([#7101](https://github.com/yt-dlp/yt-dlp/issues/7101)) by [ivanskodje](https://github.com/ivanskodje), [pukkandan](https://github.com/pukkandan) +- [Deprecate internal `Youtubedl-no-compression` header](https://github.com/yt-dlp/yt-dlp/commit/955c89584b66fcd0fcfab3e611f1edeb1ca63886) ([#6876](https://github.com/yt-dlp/yt-dlp/issues/6876)) by [coletdjnz](https://github.com/coletdjnz) +- [Do not translate newlines in `--print-to-file`](https://github.com/yt-dlp/yt-dlp/commit/9874e82b5a61582169300bea561b3e8899ad1ef7) by [pukkandan](https://github.com/pukkandan) +- [Ensure pre-processor errors do not block `--print`](https://github.com/yt-dlp/yt-dlp/commit/f005a35aa7e4f67a0c603a946c0dd714c151b2d6) by [pukkandan](https://github.com/pukkandan) (With fixes in [17ba434](https://github.com/yt-dlp/yt-dlp/commit/17ba4343cf99701692a7f4798fd42b50f644faba)) +- [Fix `filepath` being copied to underlying format dict](https://github.com/yt-dlp/yt-dlp/commit/84078a8b38f403495d00b46654c8750774d821de) by [pukkandan](https://github.com/pukkandan) +- [Improve HTTP redirect handling](https://github.com/yt-dlp/yt-dlp/commit/08916a49c777cb6e000eec092881eb93ec22076c) ([#7094](https://github.com/yt-dlp/yt-dlp/issues/7094)) by [coletdjnz](https://github.com/coletdjnz) +- [Populate `filename` and `urls` fields at all stages of `--print`](https://github.com/yt-dlp/yt-dlp/commit/170605840ea9d5ad75da6576485ea7d125b428ee) by [pukkandan](https://github.com/pukkandan) (With fixes in [b5f61b6](https://github.com/yt-dlp/yt-dlp/commit/b5f61b69d4561b81fc98c226b176f0c15493e688)) +- [Relaxed validation for numeric format filters](https://github.com/yt-dlp/yt-dlp/commit/c3f624ef0a5d7a6ae1c5ffeb243087e9fc7d79dc) by [pukkandan](https://github.com/pukkandan) +- [Support decoding multiple content encodings](https://github.com/yt-dlp/yt-dlp/commit/daafbf49b3482edae4d70dd37070be99742a926e) ([#7142](https://github.com/yt-dlp/yt-dlp/issues/7142)) by [coletdjnz](https://github.com/coletdjnz) +- [Support loading info.json with a list at it's root](https://github.com/yt-dlp/yt-dlp/commit/ab1de9cb1e39cf421c2b7dc6756c6ff1955bb313) by [pukkandan](https://github.com/pukkandan) +- [Workaround erroneous urllib Windows proxy parsing](https://github.com/yt-dlp/yt-dlp/commit/3f66b6fe50f8d5b545712f8b19d5ae62f5373980) ([#7092](https://github.com/yt-dlp/yt-dlp/issues/7092)) by [coletdjnz](https://github.com/coletdjnz) +- **cookies** + - [Defer extraction of v11 key from keyring](https://github.com/yt-dlp/yt-dlp/commit/9b7a48abd1b187eae1e3f6c9839c47d43ccec00b) by [Grub4K](https://github.com/Grub4K) + - [Move `YoutubeDLCookieJar` to cookies module](https://github.com/yt-dlp/yt-dlp/commit/b87e01c123fd560b6a674ce00f45a9459d82d98a) ([#7091](https://github.com/yt-dlp/yt-dlp/issues/7091)) by [coletdjnz](https://github.com/coletdjnz) + - [Support custom Safari cookies path](https://github.com/yt-dlp/yt-dlp/commit/a58182b75a05fe0a10c5e94a536711d3ade19c20) ([#6783](https://github.com/yt-dlp/yt-dlp/issues/6783)) by [NextFire](https://github.com/NextFire) + - [Update for chromium changes](https://github.com/yt-dlp/yt-dlp/commit/b38d4c941d1993ab27e4c0f8e024e23c2ec0f8f8) ([#6897](https://github.com/yt-dlp/yt-dlp/issues/6897)) by [mbway](https://github.com/mbway) +- **Cryptodome**: [Fix `__bool__`](https://github.com/yt-dlp/yt-dlp/commit/98ac902c4979e4529b166e873473bef42baa2e3e) by [pukkandan](https://github.com/pukkandan) +- **jsinterp** + - [Do not compile regex](https://github.com/yt-dlp/yt-dlp/commit/7aeda6cc9e73ada0b0a0b6a6748c66bef63a20a8) by [pukkandan](https://github.com/pukkandan) + - [Fix division](https://github.com/yt-dlp/yt-dlp/commit/b4a252fba81f53631c07ca40ce7583f5d19a8a36) ([#7279](https://github.com/yt-dlp/yt-dlp/issues/7279)) by [bashonly](https://github.com/bashonly) + - [Fix global object extraction](https://github.com/yt-dlp/yt-dlp/commit/01aba2519a0884ef17d5f85608dbd2a455577147) by [pukkandan](https://github.com/pukkandan) + - [Handle `NaN` in bitwise operators](https://github.com/yt-dlp/yt-dlp/commit/1d7656184c6b8aa46b29149893894b3c24f1df00) by [pukkandan](https://github.com/pukkandan) + - [Handle negative numbers better](https://github.com/yt-dlp/yt-dlp/commit/7cf51f21916292cd80bdeceb37489f5322f166dd) by [pukkandan](https://github.com/pukkandan) +- **outtmpl** + - [Allow `\n` in replacements and default.](https://github.com/yt-dlp/yt-dlp/commit/78fde6e3398ff11e5d383a66b28664badeab5180) by [pukkandan](https://github.com/pukkandan) + - [Fix some minor bugs](https://github.com/yt-dlp/yt-dlp/commit/ebe1b4e34f43c3acad30e4bcb8484681a030c114) by [pukkandan](https://github.com/pukkandan) (With fixes in [1619ab3](https://github.com/yt-dlp/yt-dlp/commit/1619ab3e67d8dc4f86fc7ed292c79345bc0d91a0)) + - [Support `str.format` syntax inside replacements](https://github.com/yt-dlp/yt-dlp/commit/ec9311c41b111110bc52cfbd6ea682c6fb23f77a) by [pukkandan](https://github.com/pukkandan) +- **update** + - [Better error handling](https://github.com/yt-dlp/yt-dlp/commit/d2e84d5eb01c66fc5304e8566348d65a7be24ed7) by [pukkandan](https://github.com/pukkandan) + - [Do not restart into versions without `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/02948a17d903f544363bb20b51a6d8baed7bba08) by [pukkandan](https://github.com/pukkandan) + - [Implement `--update-to` repo](https://github.com/yt-dlp/yt-dlp/commit/665472a7de3880578c0b7b3f95c71570c056368e) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) +- **upstream** + - [Merged with youtube-dl 07af47](https://github.com/yt-dlp/yt-dlp/commit/42f2d40b475db66486a4b4fe5b56751a640db5db) by [pukkandan](https://github.com/pukkandan) + - [Merged with youtube-dl d1c6c5](https://github.com/yt-dlp/yt-dlp/commit/4823ec9f461512daa1b8ab362893bb86a6320b26) by [pukkandan](https://github.com/pukkandan) (With fixes in [edbe5b5](https://github.com/yt-dlp/yt-dlp/commit/edbe5b589dd0860a67b4e03f58db3cd2539d91c2) by [bashonly](https://github.com/bashonly)) +- **utils** + - `FormatSorter`: [Improve `size` and `br`](https://github.com/yt-dlp/yt-dlp/commit/eedda5252c05327748dede204a8fccafa0288118) by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png) + - `js_to_json`: [Implement template strings](https://github.com/yt-dlp/yt-dlp/commit/0898c5c8ccadfc404472456a7a7751b72afebadd) ([#6623](https://github.com/yt-dlp/yt-dlp/issues/6623)) by [Grub4K](https://github.com/Grub4K) + - `locked_file`: [Fix for virtiofs](https://github.com/yt-dlp/yt-dlp/commit/45998b3e371b819ce0dbe50da703809a048cc2fe) ([#6840](https://github.com/yt-dlp/yt-dlp/issues/6840)) by [brandon-dacrib](https://github.com/brandon-dacrib) + - `strftime_or_none`: [Handle negative timestamps](https://github.com/yt-dlp/yt-dlp/commit/a35af4306d24c56c6358f89cdf204860d1cd62b4) by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) + - `traverse_obj` + - [Allow iterables in traversal](https://github.com/yt-dlp/yt-dlp/commit/21b5ec86c2c37d10c5bb97edd7051d3aac16bb3e) ([#6902](https://github.com/yt-dlp/yt-dlp/issues/6902)) by [Grub4K](https://github.com/Grub4K) + - [More fixes](https://github.com/yt-dlp/yt-dlp/commit/b079c26f0af8085bccdadc72c61c8164ca5ab0f8) ([#6959](https://github.com/yt-dlp/yt-dlp/issues/6959)) by [Grub4K](https://github.com/Grub4K) + - `write_string`: [Fix noconsole behavior](https://github.com/yt-dlp/yt-dlp/commit/3b479100df02e20dd949e046003ae96ddbfced57) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- [Do not exit early for unsuitable `url_result`](https://github.com/yt-dlp/yt-dlp/commit/baa922b5c74b10e3b86ff5e6cf6529b3aae8efab) by [pukkandan](https://github.com/pukkandan) +- [Do not warn for invalid chapter data in description](https://github.com/yt-dlp/yt-dlp/commit/84ffeb7d5e72e3829319ba7720a8480fc4c7503b) by [pukkandan](https://github.com/pukkandan) +- [Extract more metadata from ISM](https://github.com/yt-dlp/yt-dlp/commit/f68434cc74cfd3db01b266476a2eac8329fbb267) by [pukkandan](https://github.com/pukkandan) +- **abematv**: [Add fallback for title and description extraction and extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/c449c0655d7c8549e6e1389c26b628053b253d39) ([#6994](https://github.com/yt-dlp/yt-dlp/issues/6994)) by [Lesmiscore](https://github.com/Lesmiscore) +- **acast**: [Support embeds](https://github.com/yt-dlp/yt-dlp/commit/c91ac833ea99b00506e470a44cf930e4e23378c9) ([#7212](https://github.com/yt-dlp/yt-dlp/issues/7212)) by [pabs3](https://github.com/pabs3) +- **adobepass**: [Handle `Charter_Direct` MSO as `Spectrum`](https://github.com/yt-dlp/yt-dlp/commit/ea0570820336a0fe9c3b530d1b0d1e59313274f4) ([#6824](https://github.com/yt-dlp/yt-dlp/issues/6824)) by [bashonly](https://github.com/bashonly) +- **aeonco**: [Support Youtube embeds](https://github.com/yt-dlp/yt-dlp/commit/ed81b74802b4247ee8d9dc0ef87eb52baefede1c) ([#6591](https://github.com/yt-dlp/yt-dlp/issues/6591)) by [alexklapheke](https://github.com/alexklapheke) +- **afreecatv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fdd69db38924c38194ef236b26325d66ac815c88) ([#6283](https://github.com/yt-dlp/yt-dlp/issues/6283)) by [blmarket](https://github.com/blmarket) +- **ARDBetaMediathek**: [Add thumbnail](https://github.com/yt-dlp/yt-dlp/commit/f78eb41e1c0f1dcdb10317358a26bf541dc7ee15) ([#6890](https://github.com/yt-dlp/yt-dlp/issues/6890)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- **bibeltv**: [Fix extraction, support live streams and series](https://github.com/yt-dlp/yt-dlp/commit/4ad58667c102bd82a7c4cca8aa395ec1682e3b4c) ([#6505](https://github.com/yt-dlp/yt-dlp/issues/6505)) by [flashdagger](https://github.com/flashdagger) +- **bilibili** + - [Support festival videos](https://github.com/yt-dlp/yt-dlp/commit/ab29e47029e2f5b48abbbab78e82faf7cf6e9506) ([#6547](https://github.com/yt-dlp/yt-dlp/issues/6547)) by [qbnu](https://github.com/qbnu) + - SpaceVideo: [Extract signature](https://github.com/yt-dlp/yt-dlp/commit/6f10cdcf7eeaeae5b75e0a4428cd649c156a2d83) ([#7149](https://github.com/yt-dlp/yt-dlp/issues/7149)) by [elyse0](https://github.com/elyse0) +- **biliIntl**: [Add comment extraction](https://github.com/yt-dlp/yt-dlp/commit/b093c38cc9f26b59a8504211d792f053142c847d) ([#6079](https://github.com/yt-dlp/yt-dlp/issues/6079)) by [HobbyistDev](https://github.com/HobbyistDev) +- **bitchute**: [Add more fallback subdomains](https://github.com/yt-dlp/yt-dlp/commit/0c4e0fbcade0fc92d14c2a6d63e360fe067f6192) ([#6907](https://github.com/yt-dlp/yt-dlp/issues/6907)) by [Neurognostic](https://github.com/Neurognostic) +- **booyah**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f7f7a877bf8e87fd4eb0ad2494ad948ca7691114) by [pukkandan](https://github.com/pukkandan) +- **BrainPOP**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/979568f26ece80bca72b48f0dd57d676e431059a) ([#6106](https://github.com/yt-dlp/yt-dlp/issues/6106)) by [MinePlayersPE](https://github.com/MinePlayersPE) +- **bravotv** + - [Detect DRM](https://github.com/yt-dlp/yt-dlp/commit/1fe5bf240e6ade487d18079a62aa36bcc440a27a) ([#7171](https://github.com/yt-dlp/yt-dlp/issues/7171)) by [bashonly](https://github.com/bashonly) + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/06966cb8966b9aa4f60ab9c44c182a057d4ca3a3) ([#6568](https://github.com/yt-dlp/yt-dlp/issues/6568)) by [bashonly](https://github.com/bashonly) +- **camfm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/4cbfa570a1b9bd65b0f48770693377e8d842dcb0) ([#7083](https://github.com/yt-dlp/yt-dlp/issues/7083)) by [garret1317](https://github.com/garret1317) +- **cbc** + - [Fix live extractor, playlist `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/7a7b1376fbce0067cf37566bb47131bc0022638d) ([#6625](https://github.com/yt-dlp/yt-dlp/issues/6625)) by [makew0rld](https://github.com/makew0rld) + - [Ignore 426 from API](https://github.com/yt-dlp/yt-dlp/commit/4afb208cf07b59291ae3b0c4efc83945ee5b8812) ([#6781](https://github.com/yt-dlp/yt-dlp/issues/6781)) by [jo-nike](https://github.com/jo-nike) + - gem: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/871c907454693940cb56906ed9ea49fcb7154829) ([#6499](https://github.com/yt-dlp/yt-dlp/issues/6499)) by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +- **cbs**: [Add `ParamountPressExpress` extractor](https://github.com/yt-dlp/yt-dlp/commit/44369c9afa996e14e9f466754481d878811b5b4a) ([#6604](https://github.com/yt-dlp/yt-dlp/issues/6604)) by [bashonly](https://github.com/bashonly) +- **cbsnews**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/f6e43d6fa9804c24525e1fed0a87782754dab7ed) ([#6681](https://github.com/yt-dlp/yt-dlp/issues/6681)) by [bashonly](https://github.com/bashonly) +- **chilloutzone**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6f4fc5660f40f3458882a8f51601eae4af7be609) ([#6445](https://github.com/yt-dlp/yt-dlp/issues/6445)) by [bashonly](https://github.com/bashonly) +- **clipchamp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2f07c4c1da4361af213e5791279b9d152d2e4ce3) ([#6978](https://github.com/yt-dlp/yt-dlp/issues/6978)) by [bashonly](https://github.com/bashonly) +- **comedycentral**: [Add support for movies](https://github.com/yt-dlp/yt-dlp/commit/66468bbf49562ff82670cbbd456c5e8448a6df34) ([#7108](https://github.com/yt-dlp/yt-dlp/issues/7108)) by [sqrtNOT](https://github.com/sqrtNOT) +- **crtvg**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/26c517b29c8727e47948d6fff749d5297f0efb60) ([#7168](https://github.com/yt-dlp/yt-dlp/issues/7168)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **crunchyroll**: [Rework with support for movies, music and artists](https://github.com/yt-dlp/yt-dlp/commit/032de83ea9ff2f4977d9c71a93bbc1775597b762) ([#6237](https://github.com/yt-dlp/yt-dlp/issues/6237)) by [Grub4K](https://github.com/Grub4K) +- **dacast**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/c25cac2f8e5fbac2737a426d7778fd2f0efc5381) ([#6896](https://github.com/yt-dlp/yt-dlp/issues/6896)) by [bashonly](https://github.com/bashonly) +- **daftsex**: [Update domain and embed player url](https://github.com/yt-dlp/yt-dlp/commit/fc5a7f9b27d2a89b1f3ca7d33a95301c21d832cd) ([#5966](https://github.com/yt-dlp/yt-dlp/issues/5966)) by [JChris246](https://github.com/JChris246) +- **DigitalConcertHall**: [Support films](https://github.com/yt-dlp/yt-dlp/commit/55ed4ff73487feb3177b037dfc2ea527e777da3e) ([#7202](https://github.com/yt-dlp/yt-dlp/issues/7202)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **discogs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6daaf21092888beff11b807cd46f832f1f9c46a0) ([#6624](https://github.com/yt-dlp/yt-dlp/issues/6624)) by [rjy](https://github.com/rjy) +- **dlf**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b423b6a48e0b19260bc95ab7d72d2138d7f124dc) ([#6697](https://github.com/yt-dlp/yt-dlp/issues/6697)) by [nick-cd](https://github.com/nick-cd) +- **drtv**: [Fix radio page extraction](https://github.com/yt-dlp/yt-dlp/commit/9a06b7b1891b48cebbe275652ae8025a36d97d97) ([#6552](https://github.com/yt-dlp/yt-dlp/issues/6552)) by [viktor-enzell](https://github.com/viktor-enzell) +- **Dumpert**: [Fix m3u8 and support new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/f8ae441501596733e2b967430471643a1d7cacb8) ([#6091](https://github.com/yt-dlp/yt-dlp/issues/6091)) by [DataGhost](https://github.com/DataGhost), [pukkandan](https://github.com/pukkandan) +- **elevensports**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ecfe47973f6603b5367fe2cc3c65274627d94516) ([#7172](https://github.com/yt-dlp/yt-dlp/issues/7172)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **ettutv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/83465fc4100a2fb2c188898fbc2f3021f6a9b4dd) ([#6579](https://github.com/yt-dlp/yt-dlp/issues/6579)) by [elyse0](https://github.com/elyse0) +- **europarl**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/03789976d301eaed3e957dbc041573098f6af059) ([#7114](https://github.com/yt-dlp/yt-dlp/issues/7114)) by [HobbyistDev](https://github.com/HobbyistDev) +- **eurosport**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/45e87ea106ad37b2a002663fa30ee41ce97b16cd) ([#7076](https://github.com/yt-dlp/yt-dlp/issues/7076)) by [HobbyistDev](https://github.com/HobbyistDev) +- **facebook**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/3b52a606881e6adadc33444abdeacce562b79330) ([#6856](https://github.com/yt-dlp/yt-dlp/issues/6856)) by [ringus1](https://github.com/ringus1) +- **foxnews**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/97d60ad8cd6c99f01e463a9acfce8693aff2a609) ([#7222](https://github.com/yt-dlp/yt-dlp/issues/7222)) by [bashonly](https://github.com/bashonly) +- **funker530**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/cab94a0cd8b6d3fffed5a6faff030274adbed182) ([#7291](https://github.com/yt-dlp/yt-dlp/issues/7291)) by [Cyberes](https://github.com/Cyberes) +- **generic** + - [Accept values for `fragment_query`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/5cc0a8fd2e9fec50026fb92170b57993af939e4a) ([#6600](https://github.com/yt-dlp/yt-dlp/issues/6600)) by [bashonly](https://github.com/bashonly) (With fixes in [9bfe0d1](https://github.com/yt-dlp/yt-dlp/commit/9bfe0d15bd7dbdc6b0e6378fa9f5e2e289b2373b)) + - [Add extractor-args `hls_key`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/c2e0fc40a73dd85ab3920f977f579d475e66ef59) ([#6567](https://github.com/yt-dlp/yt-dlp/issues/6567)) by [bashonly](https://github.com/bashonly) + - [Attempt to detect live HLS](https://github.com/yt-dlp/yt-dlp/commit/93e7c6995e07dafb9dcc06c0d06acf6c5bdfecc5) ([#6775](https://github.com/yt-dlp/yt-dlp/issues/6775)) by [bashonly](https://github.com/bashonly) +- **genius**: [Add support for articles](https://github.com/yt-dlp/yt-dlp/commit/460da07439718d9af1e3661da2a23e05a913a2e6) ([#6474](https://github.com/yt-dlp/yt-dlp/issues/6474)) by [bashonly](https://github.com/bashonly) +- **globalplayer**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/30647668a92a0ca5cd108776804baac0996bd9f7) ([#6903](https://github.com/yt-dlp/yt-dlp/issues/6903)) by [garret1317](https://github.com/garret1317) +- **gmanetwork**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2d97d154fe4fb84fe2ed3a4e1ed5819e89b71e88) ([#5945](https://github.com/yt-dlp/yt-dlp/issues/5945)) by [HobbyistDev](https://github.com/HobbyistDev) +- **gronkh**: [Extract duration and chapters](https://github.com/yt-dlp/yt-dlp/commit/9c92b803fa24e48543ce969468d5404376e315b7) ([#6817](https://github.com/yt-dlp/yt-dlp/issues/6817)) by [satan1st](https://github.com/satan1st) +- **hentaistigma**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/04f8018a0544736a18494bc3899d06b05b78fae6) by [pukkandan](https://github.com/pukkandan) +- **hidive**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/e6ab678e36c40ded0aae305bbb866cdab554d417) by [pukkandan](https://github.com/pukkandan) +- **hollywoodreporter**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/6bdb64e2a2a6d504d8ce1dc830fbfb8a7f199c63) ([#6614](https://github.com/yt-dlp/yt-dlp/issues/6614)) by [bashonly](https://github.com/bashonly) +- **hotstar**: [Support `/shows/` URLs](https://github.com/yt-dlp/yt-dlp/commit/7f8ddebbb51c9fd4a347306332a718ba41b371b8) ([#7225](https://github.com/yt-dlp/yt-dlp/issues/7225)) by [bashonly](https://github.com/bashonly) +- **hrefli**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7e35526d5b970a034b9d76215ee3e4bd7631edcd) ([#6762](https://github.com/yt-dlp/yt-dlp/issues/6762)) by [selfisekai](https://github.com/selfisekai) +- **idolplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c14b213679ed4401288bdc86ae696932e219222) ([#6732](https://github.com/yt-dlp/yt-dlp/issues/6732)) by [ping](https://github.com/ping) +- **iq**: [Set more language codes](https://github.com/yt-dlp/yt-dlp/commit/2d5cae9636714ff922d28c548c349d5f2b48f317) ([#6476](https://github.com/yt-dlp/yt-dlp/issues/6476)) by [D0LLYNH0](https://github.com/D0LLYNH0) +- **iwara** + - [Accept old URLs](https://github.com/yt-dlp/yt-dlp/commit/ab92d8651c48d247dfb7d3f0a824cc986e47c7ed) by [Lesmiscore](https://github.com/Lesmiscore) + - [Fix authentication](https://github.com/yt-dlp/yt-dlp/commit/0a5d7c39e17bb9bd50c9db42bcad40eb82d7f784) ([#7137](https://github.com/yt-dlp/yt-dlp/issues/7137)) by [toomyzoom](https://github.com/toomyzoom) + - [Fix format sorting](https://github.com/yt-dlp/yt-dlp/commit/56793f74c36899742d7abd52afb0deca97d469e1) ([#6651](https://github.com/yt-dlp/yt-dlp/issues/6651)) by [hasezoey](https://github.com/hasezoey) + - [Fix typo](https://github.com/yt-dlp/yt-dlp/commit/d1483ec693c79f0b4ddf493870bcb840aca4da08) by [Lesmiscore](https://github.com/Lesmiscore) + - [Implement login](https://github.com/yt-dlp/yt-dlp/commit/21b9413cf7dd4830b2ece57af21589dd4538fc52) ([#6721](https://github.com/yt-dlp/yt-dlp/issues/6721)) by [toomyzoom](https://github.com/toomyzoom) + - [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/c14af7a741931b364bab3d9546c0f4359f318f8c) ([#6557](https://github.com/yt-dlp/yt-dlp/issues/6557)) by [Lesmiscore](https://github.com/Lesmiscore) + - [Report private videos](https://github.com/yt-dlp/yt-dlp/commit/95a383be1b6fb00c92ee3fb091732c4f6009acb6) ([#6641](https://github.com/yt-dlp/yt-dlp/issues/6641)) by [Lesmiscore](https://github.com/Lesmiscore) +- **JStream**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3459d3c5af3b2572ed51e8ecfda6c11022a838c6) ([#6252](https://github.com/yt-dlp/yt-dlp/issues/6252)) by [Lesmiscore](https://github.com/Lesmiscore) +- **jwplatform**: [Update `_extract_embed_urls`](https://github.com/yt-dlp/yt-dlp/commit/cf9fd52fabe71d6e7c30d3ea525029ffa561fc9c) ([#6383](https://github.com/yt-dlp/yt-dlp/issues/6383)) by [carusocr](https://github.com/carusocr) +- **kick**: [Make initial request non-fatal](https://github.com/yt-dlp/yt-dlp/commit/0a6918a4a1431960181d8c50e0bbbcb0afbaff9a) by [bashonly](https://github.com/bashonly) +- **LastFM**: [Rewrite playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/026435714cb7c39613a0d7d2acd15d3823b78d94) ([#6379](https://github.com/yt-dlp/yt-dlp/issues/6379)) by [hatienl0i261299](https://github.com/hatienl0i261299), [pukkandan](https://github.com/pukkandan) +- **lbry**: [Extract original quality formats](https://github.com/yt-dlp/yt-dlp/commit/44c0d66442b568d9e1359e669d8b029b08a77fa7) ([#7257](https://github.com/yt-dlp/yt-dlp/issues/7257)) by [bashonly](https://github.com/bashonly) +- **line**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/faa0332ed69e070cf3bd31390589a596e962f392) ([#6734](https://github.com/yt-dlp/yt-dlp/issues/6734)) by [sian1468](https://github.com/sian1468) +- **livestream**: [Support videos with account id](https://github.com/yt-dlp/yt-dlp/commit/bfdf144c7e5d7a93fbfa9d8e65598c72bf2b542a) ([#6324](https://github.com/yt-dlp/yt-dlp/issues/6324)) by [theperfectpunk](https://github.com/theperfectpunk) +- **medaltv**: [Fix clips](https://github.com/yt-dlp/yt-dlp/commit/1e3c2b6ec28d7ab5e31341fa93c47b65be4fbff4) ([#6502](https://github.com/yt-dlp/yt-dlp/issues/6502)) by [xenova](https://github.com/xenova) +- **mediastream**: [Improve `WinSports` and embed extraction](https://github.com/yt-dlp/yt-dlp/commit/03025b6e105139d01cd415ddc51fd692957fd2ba) ([#6426](https://github.com/yt-dlp/yt-dlp/issues/6426)) by [bashonly](https://github.com/bashonly) +- **mgtv**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/59d9fe08312bbb76ee26238d207a8ca35410a48d) ([#7234](https://github.com/yt-dlp/yt-dlp/issues/7234)) by [bashonly](https://github.com/bashonly) +- **Mzaalo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/dc3c44f349ba85af320e706e2a27ad81a78b1c6e) ([#7163](https://github.com/yt-dlp/yt-dlp/issues/7163)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **nbc**: [Fix `NBCStations` direct mp4 formats](https://github.com/yt-dlp/yt-dlp/commit/9be0fe1fd967f62cbf3c60bd14e1021a70abc147) ([#6637](https://github.com/yt-dlp/yt-dlp/issues/6637)) by [bashonly](https://github.com/bashonly) +- **nebula**: [Add `beta.nebula.tv`](https://github.com/yt-dlp/yt-dlp/commit/cbfe2e5cbe0f4649a91e323a82b8f5f774f36662) ([#6516](https://github.com/yt-dlp/yt-dlp/issues/6516)) by [unbeatable-101](https://github.com/unbeatable-101) +- **nekohacker**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/489f51279d00318018478fd7461eddbe3b45297e) ([#7003](https://github.com/yt-dlp/yt-dlp/issues/7003)) by [hasezoey](https://github.com/hasezoey) +- **nhk** + - [Add `NhkRadiru` extractor](https://github.com/yt-dlp/yt-dlp/commit/8f0be90ecb3b8d862397177bb226f17b245ef933) ([#6819](https://github.com/yt-dlp/yt-dlp/issues/6819)) by [garret1317](https://github.com/garret1317) + - [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/f41b949a2ef646fbc36375febbe3f0c19d742c0f) ([#7180](https://github.com/yt-dlp/yt-dlp/issues/7180)) by [menschel](https://github.com/menschel), [sjthespian](https://github.com/sjthespian) + - `NhkRadiruLive`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/81c8b9bdd9841b72cbfc1bbff9dab5fb4aa038b0) ([#7332](https://github.com/yt-dlp/yt-dlp/issues/7332)) by [garret1317](https://github.com/garret1317) +- **niconico** + - [Download comments from the new endpoint](https://github.com/yt-dlp/yt-dlp/commit/52ecc33e221f7de7eb6fed6c22489f0c5fdd2c6d) ([#6773](https://github.com/yt-dlp/yt-dlp/issues/6773)) by [Lesmiscore](https://github.com/Lesmiscore) + - live: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8f9250fe280d37f0988646cd5cc0072f4d33a6d) ([#5764](https://github.com/yt-dlp/yt-dlp/issues/5764)) by [Lesmiscore](https://github.com/Lesmiscore) + - series: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/c86e433c35fe5da6cb29f3539eef97497f84ed38) ([#6898](https://github.com/yt-dlp/yt-dlp/issues/6898)) by [sqrtNOT](https://github.com/sqrtNOT) +- **nubilesporn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d4e6ef40772e0560a8ed33b844ef7549e86837be) ([#6231](https://github.com/yt-dlp/yt-dlp/issues/6231)) by [permunkle](https://github.com/permunkle) +- **odnoklassniki**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/1a2eb5bda51d8b7a78a65acebf72a0dcf9da196b) ([#7217](https://github.com/yt-dlp/yt-dlp/issues/7217)) by [bashonly](https://github.com/bashonly) +- **opencast** + - [Add ltitools to `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/3588be59cee429a0ab5c4ceb2f162298bb44147d) ([#6371](https://github.com/yt-dlp/yt-dlp/issues/6371)) by [C0D3D3V](https://github.com/C0D3D3V) + - [Fix format bug](https://github.com/yt-dlp/yt-dlp/commit/89dbf0848370deaa55af88c3593a2a264124caf5) ([#6512](https://github.com/yt-dlp/yt-dlp/issues/6512)) by [C0D3D3V](https://github.com/C0D3D3V) +- **owncloud**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c6d4b82a8b8bce59b1c9ce5e6d349ea428dac0a7) ([#6533](https://github.com/yt-dlp/yt-dlp/issues/6533)) by [C0D3D3V](https://github.com/C0D3D3V) +- **Parler**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/80ea6d3dea8483cddd39fc89b5ee1fc06670c33c) ([#6446](https://github.com/yt-dlp/yt-dlp/issues/6446)) by [JChris246](https://github.com/JChris246) +- **pgatour**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae182ad89e1427ff7b1684d6a44ff93fa857a0c) ([#6613](https://github.com/yt-dlp/yt-dlp/issues/6613)) by [bashonly](https://github.com/bashonly) +- **playsuisse**: [Support new url format](https://github.com/yt-dlp/yt-dlp/commit/94627c5dde12a72766bdba36e056916c29c40ed1) ([#6528](https://github.com/yt-dlp/yt-dlp/issues/6528)) by [sbor23](https://github.com/sbor23) +- **polskieradio**: [Improve extractors](https://github.com/yt-dlp/yt-dlp/commit/738c90a463257634455ada3e5c18b714c531dede) ([#5948](https://github.com/yt-dlp/yt-dlp/issues/5948)) by [selfisekai](https://github.com/selfisekai) +- **pornez**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/cbdf9408e6f1e35e98fd6477b3d6902df5b8a47f) ([#6792](https://github.com/yt-dlp/yt-dlp/issues/6792)) by [zhgwn](https://github.com/zhgwn) +- **pornhub**: [Set access cookies to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/62beefa818c75c20b6941389bb197051554a5d41) ([#6685](https://github.com/yt-dlp/yt-dlp/issues/6685)) by [arobase-che](https://github.com/arobase-che), [Schmoaaaaah](https://github.com/Schmoaaaaah) +- **rai**: [Rewrite extractors](https://github.com/yt-dlp/yt-dlp/commit/c6d3f81a4077aaf9cffc6aa2d0dec92f38e74bb0) ([#5940](https://github.com/yt-dlp/yt-dlp/issues/5940)) by [danog](https://github.com/danog), [nixxo](https://github.com/nixxo) +- **recurbate**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2502cfed91415c7ccfff925fd3404d230046484) ([#6297](https://github.com/yt-dlp/yt-dlp/issues/6297)) by [mrscrapy](https://github.com/mrscrapy) +- **reddit** + - [Add login support](https://github.com/yt-dlp/yt-dlp/commit/4d9280c9c853733534dda60486fa949bcca36c9e) ([#6950](https://github.com/yt-dlp/yt-dlp/issues/6950)) by [bashonly](https://github.com/bashonly) + - [Support cookies and short URLs](https://github.com/yt-dlp/yt-dlp/commit/7a6f6f24592a8065376f11a58e44878807732cf6) ([#6825](https://github.com/yt-dlp/yt-dlp/issues/6825)) by [bashonly](https://github.com/bashonly) +- **rokfin**: [Re-construct manifest url](https://github.com/yt-dlp/yt-dlp/commit/7a6c8a0807941dd24fbf0d6172e811884f98e027) ([#6507](https://github.com/yt-dlp/yt-dlp/issues/6507)) by [vampirefrog](https://github.com/vampirefrog) +- **rottentomatoes**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2d306c03d6f2697fcbabb7da35aa62cc078359d3) ([#6844](https://github.com/yt-dlp/yt-dlp/issues/6844)) by [JChris246](https://github.com/JChris246) +- **rozhlas** + - [Extract manifest formats](https://github.com/yt-dlp/yt-dlp/commit/e4cf7741f9302b3faa092962f2895b55cb3d89bb) ([#6590](https://github.com/yt-dlp/yt-dlp/issues/6590)) by [bashonly](https://github.com/bashonly) + - `MujRozhlas`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2b801fea59628d5c873e06a0727fbf2051bbd1f) ([#7129](https://github.com/yt-dlp/yt-dlp/issues/7129)) by [stanoarn](https://github.com/stanoarn) +- **rtvc**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/9b30cd3dfce83c2f0201b28a7a3ef44ab9722664) ([#6578](https://github.com/yt-dlp/yt-dlp/issues/6578)) by [elyse0](https://github.com/elyse0) +- **rumble** + - [Detect timeline format](https://github.com/yt-dlp/yt-dlp/commit/78bc1868ff3352108ab2911033d1ac67a55f151e) by [pukkandan](https://github.com/pukkandan) + - [Fix videos without quality selection](https://github.com/yt-dlp/yt-dlp/commit/6994afc030d2a786d8032075ed71a14d7eac5a4f) by [pukkandan](https://github.com/pukkandan) +- **sbs**: [Overhaul extractor for new API](https://github.com/yt-dlp/yt-dlp/commit/6a765f135ccb654861336ea27a2c1c24ea8e286f) ([#6839](https://github.com/yt-dlp/yt-dlp/issues/6839)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [vidiot720](https://github.com/vidiot720) +- **shemaroome**: [Pass `stream_key` header to downloader](https://github.com/yt-dlp/yt-dlp/commit/7bc92517463f5766e9d9b92c3823b5cf403c0e3d) ([#7224](https://github.com/yt-dlp/yt-dlp/issues/7224)) by [bashonly](https://github.com/bashonly) +- **sonyliv**: [Fix login with token](https://github.com/yt-dlp/yt-dlp/commit/4815d35c191e7d375b94492a6486dd2ba43a8954) ([#7223](https://github.com/yt-dlp/yt-dlp/issues/7223)) by [bashonly](https://github.com/bashonly) +- **stageplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e5265dc6517478e589ee3c1ff0cb19bdf4e35ce1) ([#6838](https://github.com/yt-dlp/yt-dlp/issues/6838)) by [bashonly](https://github.com/bashonly) +- **stripchat**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f9213f8a2d7ba46b912afe1dd3ce6bb700a33d72) ([#7306](https://github.com/yt-dlp/yt-dlp/issues/7306)) by [foreignBlade](https://github.com/foreignBlade) +- **substack**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/12037d8b0a578fcc78a5c8f98964e48ee6060e25) ([#7218](https://github.com/yt-dlp/yt-dlp/issues/7218)) by [bashonly](https://github.com/bashonly) +- **sverigesradio**: [Support slug URLs](https://github.com/yt-dlp/yt-dlp/commit/5ee9a7d6e18ceea956e831994cf11c423979354f) ([#7220](https://github.com/yt-dlp/yt-dlp/issues/7220)) by [bashonly](https://github.com/bashonly) +- **tagesschau**: [Fix single audio urls](https://github.com/yt-dlp/yt-dlp/commit/af7585c824a1e405bd8afa46d87b4be322edc93c) ([#6626](https://github.com/yt-dlp/yt-dlp/issues/6626)) by [flashdagger](https://github.com/flashdagger) +- **teamcoco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c459d45dd4d417fb80a52e1a04e607776a44baa4) ([#6437](https://github.com/yt-dlp/yt-dlp/issues/6437)) by [bashonly](https://github.com/bashonly) +- **telecaribe**: [Expand livestream support](https://github.com/yt-dlp/yt-dlp/commit/69b2f838d3d3e37dc17367ef64d978db1bea45cf) ([#6601](https://github.com/yt-dlp/yt-dlp/issues/6601)) by [bashonly](https://github.com/bashonly) +- **tencent**: [Fix fatal metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/971d901d129403e875a04dd92109507a03fbc070) ([#7219](https://github.com/yt-dlp/yt-dlp/issues/7219)) by [bashonly](https://github.com/bashonly) +- **thesun**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/0181b9a1b31db3fde943f7cd3fe9662f23bff292) ([#6522](https://github.com/yt-dlp/yt-dlp/issues/6522)) by [hatienl0i261299](https://github.com/hatienl0i261299) +- **tiktok** + - [Extract 1080p adaptive formats](https://github.com/yt-dlp/yt-dlp/commit/c2a1bdb00931969193f2a31ea27b9c66a07aaec2) ([#7228](https://github.com/yt-dlp/yt-dlp/issues/7228)) by [bashonly](https://github.com/bashonly) + - [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/925936908a3c3ee0e508621db14696b9f6a8b563) ([#6777](https://github.com/yt-dlp/yt-dlp/issues/6777)) by [bashonly](https://github.com/bashonly) + - [Fix mp3 formats](https://github.com/yt-dlp/yt-dlp/commit/8ceb07e870424c219dced8f4348729553f05c5cc) ([#6615](https://github.com/yt-dlp/yt-dlp/issues/6615)) by [bashonly](https://github.com/bashonly) + - [Fix resolution extraction](https://github.com/yt-dlp/yt-dlp/commit/ab6057ec80aa75db6303b8206916d00c376c622c) ([#7237](https://github.com/yt-dlp/yt-dlp/issues/7237)) by [puc9](https://github.com/puc9) + - [Improve `TikTokLive` extractor](https://github.com/yt-dlp/yt-dlp/commit/216bcb66d7dce0762767d751dad10650cb57da9d) ([#6520](https://github.com/yt-dlp/yt-dlp/issues/6520)) by [bashonly](https://github.com/bashonly) +- **triller**: [Support short URLs, detect removed videos](https://github.com/yt-dlp/yt-dlp/commit/33b737bedf8383c0d00d4e1d06a5273dcdfdb756) ([#6636](https://github.com/yt-dlp/yt-dlp/issues/6636)) by [bashonly](https://github.com/bashonly) +- **tv4**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/125ffaa1737dd04716f2f6fbb0595ad3eb7a4b1c) ([#5649](https://github.com/yt-dlp/yt-dlp/issues/5649)) by [dirkf](https://github.com/dirkf), [TxI5](https://github.com/TxI5) +- **tvp**: [Use new API](https://github.com/yt-dlp/yt-dlp/commit/0c7ce146e4d2a84e656d78f6857952bfd25ab389) ([#6989](https://github.com/yt-dlp/yt-dlp/issues/6989)) by [selfisekai](https://github.com/selfisekai) +- **tvplay**: [Remove outdated domains](https://github.com/yt-dlp/yt-dlp/commit/937264419f9bf375d5656785ae6e53282587c15d) ([#7106](https://github.com/yt-dlp/yt-dlp/issues/7106)) by [ivanskodje](https://github.com/ivanskodje) +- **twitch** + - [Extract original size thumbnail](https://github.com/yt-dlp/yt-dlp/commit/80b732b7a9585b2a61e456dc0d2d014a439cbaee) ([#6629](https://github.com/yt-dlp/yt-dlp/issues/6629)) by [JC-Chung](https://github.com/JC-Chung) + - [Fix `is_live`](https://github.com/yt-dlp/yt-dlp/commit/0551511b45f7847f40e4314aa9e624e80d086539) ([#6500](https://github.com/yt-dlp/yt-dlp/issues/6500)) by [elyse0](https://github.com/elyse0) + - [Support mobile clips](https://github.com/yt-dlp/yt-dlp/commit/02312c03cf53eb1da24c9ad022ee79af26060733) ([#6699](https://github.com/yt-dlp/yt-dlp/issues/6699)) by [bepvte](https://github.com/bepvte) + - [Update `_CLIENT_ID` and add extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/01231feb142e80828985aabdec04ac608e3d43e2) ([#7200](https://github.com/yt-dlp/yt-dlp/issues/7200)) by [bashonly](https://github.com/bashonly) + - vod: [Support links from schedule tab](https://github.com/yt-dlp/yt-dlp/commit/dbce5afa6bb61f6272ade613f2e9a3d66b88c7ea) ([#7071](https://github.com/yt-dlp/yt-dlp/issues/7071)) by [falbrechtskirchinger](https://github.com/falbrechtskirchinger) +- **twitter** + - [Add login support](https://github.com/yt-dlp/yt-dlp/commit/d1795f4a6af99c976c9d3ea2dabe5cf4f8965d3c) ([#7258](https://github.com/yt-dlp/yt-dlp/issues/7258)) by [bashonly](https://github.com/bashonly) + - [Default to GraphQL, handle auth errors](https://github.com/yt-dlp/yt-dlp/commit/147e62fc584c3ea6fdb09bb7a47905df68553a22) ([#6957](https://github.com/yt-dlp/yt-dlp/issues/6957)) by [bashonly](https://github.com/bashonly) + - spaces: [Add `release_timestamp`](https://github.com/yt-dlp/yt-dlp/commit/1c16d9df5330819cc79ad588b24aa5b72765c168) ([#7186](https://github.com/yt-dlp/yt-dlp/issues/7186)) by [CeruleanSky](https://github.com/CeruleanSky) +- **urplay**: [Extract all subtitles](https://github.com/yt-dlp/yt-dlp/commit/7bcd4813215ac98daa4949af2ffc677c78307a38) ([#7309](https://github.com/yt-dlp/yt-dlp/issues/7309)) by [hoaluvn](https://github.com/hoaluvn) +- **voot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4f7b11cc1c1cebf598107e00cd7295588ed484da) ([#7227](https://github.com/yt-dlp/yt-dlp/issues/7227)) by [bashonly](https://github.com/bashonly) +- **vrt**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/1a7dcca378e80a387923ee05c250d8ba122441c6) ([#6244](https://github.com/yt-dlp/yt-dlp/issues/6244)) by [bashonly](https://github.com/bashonly), [bergoid](https://github.com/bergoid), [jeroenj](https://github.com/jeroenj) +- **weverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b844a3f8b16500663e7ab6c6ec061cc9b30f71ac) ([#6711](https://github.com/yt-dlp/yt-dlp/issues/6711)) by [bashonly](https://github.com/bashonly) (With fixes in [fd5d93f](https://github.com/yt-dlp/yt-dlp/commit/fd5d93f7040f9776fd541f4e4079dad7d3b3fb4f)) +- **wevidi**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1ea15603d852971ed7d92f4de12808b27b3d9370) ([#6868](https://github.com/yt-dlp/yt-dlp/issues/6868)) by [truedread](https://github.com/truedread) +- **weyyak**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6dc00acf0f1f1107a626c21befd1691403e6aeeb) ([#7124](https://github.com/yt-dlp/yt-dlp/issues/7124)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **whyp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2c566ed14101673c651c08c306c30fa5b4010b85) ([#6803](https://github.com/yt-dlp/yt-dlp/issues/6803)) by [CoryTibbettsDev](https://github.com/CoryTibbettsDev) +- **wrestleuniverse** + - [Fix cookies support](https://github.com/yt-dlp/yt-dlp/commit/c8561c6d03f025268d6d3972abeb47987c8d7cbb) by [bashonly](https://github.com/bashonly) + - [Fix extraction, add login](https://github.com/yt-dlp/yt-dlp/commit/ef8fb7f029b816dfc95600727d84400591a3b5c5) ([#6982](https://github.com/yt-dlp/yt-dlp/issues/6982)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- **wykop**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/aed945e1b9b7d3af2a907e1a12e6508cc81d6a20) ([#6140](https://github.com/yt-dlp/yt-dlp/issues/6140)) by [selfisekai](https://github.com/selfisekai) +- **ximalaya**: [Sort playlist entries](https://github.com/yt-dlp/yt-dlp/commit/8790ea7b2536332777bce68590386b1aa935fac7) ([#7292](https://github.com/yt-dlp/yt-dlp/issues/7292)) by [linsui](https://github.com/linsui) +- **YahooGyaOIE, YahooGyaOPlayerIE**: [Delete extractors due to website close](https://github.com/yt-dlp/yt-dlp/commit/68be95bd0ca3f76aa63c9812935bd826b3a42e53) ([#6218](https://github.com/yt-dlp/yt-dlp/issues/6218)) by [Lesmiscore](https://github.com/Lesmiscore) +- **yappy**: YappyProfile: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6f69101dc912690338d32e2aab085c32e44eba3f) ([#7346](https://github.com/yt-dlp/yt-dlp/issues/7346)) by [7vlad7](https://github.com/7vlad7) +- **youku**: [Improve error message](https://github.com/yt-dlp/yt-dlp/commit/ef0848abd425dfda6db62baa8d72897eefb0007f) ([#6690](https://github.com/yt-dlp/yt-dlp/issues/6690)) by [carusocr](https://github.com/carusocr) +- **youporn**: [Extract m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/ddae33754ae1f32dd9c64cf895c47d20f6b5f336) by [pukkandan](https://github.com/pukkandan) +- **youtube** + - [Add client name to `format_note` when `-v`](https://github.com/yt-dlp/yt-dlp/commit/c795c39f27244cbce846067891827e4847036441) ([#6254](https://github.com/yt-dlp/yt-dlp/issues/6254)) by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) + - [Add extractor-arg `include_duplicate_formats`](https://github.com/yt-dlp/yt-dlp/commit/86cb922118b236306310a72657f70426c20e28bb) by [pukkandan](https://github.com/pukkandan) + - [Bypass throttling for `-f17`](https://github.com/yt-dlp/yt-dlp/commit/c9abebb851e6188cb34b9eb744c1863dd46af919) by [pukkandan](https://github.com/pukkandan) + - [Construct fragment list lazily](https://github.com/yt-dlp/yt-dlp/commit/2a23d92d9ec44a0168079e38bcf3d383e5c4c7bb) by [pukkandan](https://github.com/pukkandan) (With fixes in [e389d17](https://github.com/yt-dlp/yt-dlp/commit/e389d172b6f42e4f332ae679dc48543fb7b9b61d)) + - [Define strict uploader metadata mapping](https://github.com/yt-dlp/yt-dlp/commit/7666b93604b97e9ada981c6b04ccf5605dd1bd44) ([#6384](https://github.com/yt-dlp/yt-dlp/issues/6384)) by [coletdjnz](https://github.com/coletdjnz) + - [Determine audio language using automatic captions](https://github.com/yt-dlp/yt-dlp/commit/ff9b0e071ffae5543cc309e6f9e647ac51e5846e) by [pukkandan](https://github.com/pukkandan) + - [Extract `channel_is_verified`](https://github.com/yt-dlp/yt-dlp/commit/8213ce28a485e200f6a7e1af1434a987c8e702bd) ([#7213](https://github.com/yt-dlp/yt-dlp/issues/7213)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract `heatmap` data](https://github.com/yt-dlp/yt-dlp/commit/5caf30dbc34f10b0be60676fece635b5c59f0d72) ([#7100](https://github.com/yt-dlp/yt-dlp/issues/7100)) by [tntmod54321](https://github.com/tntmod54321) + - [Extract more metadata for comments](https://github.com/yt-dlp/yt-dlp/commit/c35448b7b14113b35c4415dbfbf488c4731f006f) ([#7179](https://github.com/yt-dlp/yt-dlp/issues/7179)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract uploader metadata for feed/playlist items](https://github.com/yt-dlp/yt-dlp/commit/93e12ed76ef49252dc6869b59d21d0777e5e11af) by [coletdjnz](https://github.com/coletdjnz) + - [Fix comment loop detection for pinned comments](https://github.com/yt-dlp/yt-dlp/commit/141a8dff98874a426d7fbe772e0a8421bb42656f) ([#6714](https://github.com/yt-dlp/yt-dlp/issues/6714)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix continuation loop with no comments](https://github.com/yt-dlp/yt-dlp/commit/18f8fba7c89a87f99cc3313a1795848867e84fff) ([#7148](https://github.com/yt-dlp/yt-dlp/issues/7148)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix parsing `comment_count`](https://github.com/yt-dlp/yt-dlp/commit/071670cbeaa01ddf2cc20a95ae6da25f8f086431) ([#6523](https://github.com/yt-dlp/yt-dlp/issues/6523)) by [nick-cd](https://github.com/nick-cd) + - [Handle incomplete initial data from watch page](https://github.com/yt-dlp/yt-dlp/commit/607510b9f2f67bfe7d33d74031a5c1fe22a24862) ([#6510](https://github.com/yt-dlp/yt-dlp/issues/6510)) by [coletdjnz](https://github.com/coletdjnz) + - [Ignore wrong fps of some formats](https://github.com/yt-dlp/yt-dlp/commit/97afb093d4cbe5df889145afa5f9ede4535e93e4) by [pukkandan](https://github.com/pukkandan) + - [Misc cleanup](https://github.com/yt-dlp/yt-dlp/commit/14a14335b280766fbf5a469ae26836d6c1fe450a) by [coletdjnz](https://github.com/coletdjnz) + - [Prioritize premium formats](https://github.com/yt-dlp/yt-dlp/commit/51a07b0dca4c079d58311c19b6d1c097c24bb021) by [pukkandan](https://github.com/pukkandan) + - [Revert default formats to `https`](https://github.com/yt-dlp/yt-dlp/commit/c6786ff3baaf72a5baa4d56d34058e54cbcf8ceb) by [pukkandan](https://github.com/pukkandan) + - [Support podcasts and releases tabs](https://github.com/yt-dlp/yt-dlp/commit/447afb9eaa65bc677e3245c83e53a8e69c174a3c) by [coletdjnz](https://github.com/coletdjnz) + - [Support shorter relative time format](https://github.com/yt-dlp/yt-dlp/commit/2fb35f6004c7625f0dd493da4a5abf0690f7777c) ([#7191](https://github.com/yt-dlp/yt-dlp/issues/7191)) by [coletdjnz](https://github.com/coletdjnz) + - music_search_url: [Extract title](https://github.com/yt-dlp/yt-dlp/commit/69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2) ([#7102](https://github.com/yt-dlp/yt-dlp/issues/7102)) by [kangalio](https://github.com/kangalio) +- **zaiko** + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/345b4c0aedd9d19898ce00d5cef35fe0d277a052) ([#7254](https://github.com/yt-dlp/yt-dlp/issues/7254)) by [c-basalt](https://github.com/c-basalt) + - ZaikoETicket: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5cc09c004bd5edbbada9b041c08a720cadc4f4df) ([#7347](https://github.com/yt-dlp/yt-dlp/issues/7347)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **zdf**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ee0ed0338df328cd986f97315c8162b5a151476d) by [bashonly](https://github.com/bashonly) +- **zee5**: [Fix extraction of new content](https://github.com/yt-dlp/yt-dlp/commit/9d7fde89a40360396f0baa2ee8bf507f92108b32) ([#7280](https://github.com/yt-dlp/yt-dlp/issues/7280)) by [bashonly](https://github.com/bashonly) +- **zingmp3**: [Fix and improve extractors](https://github.com/yt-dlp/yt-dlp/commit/17d7ca84ea723c20668bd9bfa938be7ea0e64f6b) ([#6367](https://github.com/yt-dlp/yt-dlp/issues/6367)) by [hatienl0i261299](https://github.com/hatienl0i261299) +- **zoom** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/79c77e85b70ae3b9942d5a88c14d021a9bd24222) ([#6741](https://github.com/yt-dlp/yt-dlp/issues/6741)) by [shreyasminocha](https://github.com/shreyasminocha) + - [Fix share URL extraction](https://github.com/yt-dlp/yt-dlp/commit/90c1f5120694105496a6ad9e3ecfc6c25de6cae1) ([#6789](https://github.com/yt-dlp/yt-dlp/issues/6789)) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **curl**: [Fix progress reporting](https://github.com/yt-dlp/yt-dlp/commit/66aeaac9aa30b5959069ba84e53a5508232deb38) by [pukkandan](https://github.com/pukkandan) +- **fragment**: [Do not sleep between fragments](https://github.com/yt-dlp/yt-dlp/commit/424f3bf03305088df6e01d62f7311be8601ad3f4) by [pukkandan](https://github.com/pukkandan) + +#### Postprocessor changes +- [Fix chapters if duration is not extracted](https://github.com/yt-dlp/yt-dlp/commit/01ddec7e661bf90dc4c34e6924eb9d7629886cef) ([#6037](https://github.com/yt-dlp/yt-dlp/issues/6037)) by [bashonly](https://github.com/bashonly) +- [Print newline for `--progress-template`](https://github.com/yt-dlp/yt-dlp/commit/13ff78095372fd98900a32572cf817994c07ccb5) by [pukkandan](https://github.com/pukkandan) +- **EmbedThumbnail, FFmpegMetadata**: [Fix error on attaching thumbnails and info json for mkv/mka](https://github.com/yt-dlp/yt-dlp/commit/0f0875ed555514f32522a0f30554fb08825d5124) ([#6647](https://github.com/yt-dlp/yt-dlp/issues/6647)) by [Lesmiscore](https://github.com/Lesmiscore) +- **FFmpegFixupM3u8PP**: [Check audio codec before fixup](https://github.com/yt-dlp/yt-dlp/commit/3f7e2bd80e3c5d8a1682f20a1b245fcd974f295d) ([#6778](https://github.com/yt-dlp/yt-dlp/issues/6778)) by [bashonly](https://github.com/bashonly) +- **FixupDuplicateMoov**: [Fix bug in triggering](https://github.com/yt-dlp/yt-dlp/commit/26010b5cec50193b98ad7845d1d77450f9f14c2b) by [pukkandan](https://github.com/pukkandan) + +#### Misc. changes +- [Add automatic duplicate issue detection](https://github.com/yt-dlp/yt-dlp/commit/15b2d3db1d40b0437fca79d8874d392aa54b3cdd) by [pukkandan](https://github.com/pukkandan) +- **build** + - [Fix macOS target](https://github.com/yt-dlp/yt-dlp/commit/44a79958f0b596ee71e1eb25f158610aada29d1b) by [Grub4K](https://github.com/Grub4K) + - [Implement build verification using `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/b73193c99aa23b135732408a5fcf655c68d731c6) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Pin `pyinstaller` version for MacOS](https://github.com/yt-dlp/yt-dlp/commit/427a8fafbb0e18c28d0ed7960be838d7b26b88d3) by [pukkandan](https://github.com/pukkandan) + - [Various build workflow improvements](https://github.com/yt-dlp/yt-dlp/commit/c4efa0aefec8daef1de62fd1693f13edf3c8b03c) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- **cleanup** + - Miscellaneous + - [6f2287c](https://github.com/yt-dlp/yt-dlp/commit/6f2287cb18cbfb27518f068d868fa9390fee78ad) by [pukkandan](https://github.com/pukkandan) + - [ad54c91](https://github.com/yt-dlp/yt-dlp/commit/ad54c9130e793ce433bf9da334fa80df9f3aee58) by [freezboltz](https://github.com/freezboltz), [mikf](https://github.com/mikf), [pukkandan](https://github.com/pukkandan) +- **cleanup, utils**: [Split into submodules](https://github.com/yt-dlp/yt-dlp/commit/69bec6730ec9d724bcedeab199d9d684d61423ba) ([#7090](https://github.com/yt-dlp/yt-dlp/issues/7090)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +- **cli_to_api**: [Add script](https://github.com/yt-dlp/yt-dlp/commit/46f1370e9af6f8af8762f67e27e5acb8f0c48a47) by [pukkandan](https://github.com/pukkandan) +- **devscripts**: `make_changelog`: [Various improvements](https://github.com/yt-dlp/yt-dlp/commit/23c39a4beadee382060bb47fdaa21316ca707d38) by [Grub4K](https://github.com/Grub4K) +- **docs**: [Misc improvements](https://github.com/yt-dlp/yt-dlp/commit/c8bc203fbf3bb09914e53f0833eed622ab7edbb9) by [pukkandan](https://github.com/pukkandan) + +### 2023.03.04 + +#### Extractor changes +- bilibili + - [Fix for downloading wrong subtitles](https://github.com/yt-dlp/yt-dlp/commit/8a83baaf218ab89e6e7faa76b7c7be3a2ec19e3a) ([#6358](https://github.com/yt-dlp/yt-dlp/issues/6358)) by [LXYan2333](https://github.com/LXYan2333) +- ESPNcricinfo + - [Handle new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/640c934823fc2d1ec77ec932566078014058635f) ([#6321](https://github.com/yt-dlp/yt-dlp/issues/6321)) by [venkata-krishnas](https://github.com/venkata-krishnas) +- lefigaro + - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/eb8fd6d044e8926532772b72be0645c6b8ecb3aa) ([#6309](https://github.com/yt-dlp/yt-dlp/issues/6309)) by [elyse0](https://github.com/elyse0) +- lumni + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1f8489cccbdc6e96027ef527b88717458f0900e8) ([#6302](https://github.com/yt-dlp/yt-dlp/issues/6302)) by [carusocr](https://github.com/carusocr) +- Prankcast + - [Fix tags](https://github.com/yt-dlp/yt-dlp/commit/ed4cc4ea793314c50ae3f82e98248c1de1c25694) ([#6316](https://github.com/yt-dlp/yt-dlp/issues/6316)) by [columndeeply](https://github.com/columndeeply) +- rutube + - [Extract chapters from description](https://github.com/yt-dlp/yt-dlp/commit/22ccd5420b3eb0782776071f12cccd1fedaa1fd0) ([#6345](https://github.com/yt-dlp/yt-dlp/issues/6345)) by [mushbite](https://github.com/mushbite) +- SportDeutschland + - [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/45db357289b4e1eec09093c8bc5446520378f426) by [pukkandan](https://github.com/pukkandan) +- telecaribe + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b40471282286bd2b09c485bf79afd271d229272c) ([#6311](https://github.com/yt-dlp/yt-dlp/issues/6311)) by [elyse0](https://github.com/elyse0) +- tubetugraz + - [Support `--twofactor` (#6424)](https://github.com/yt-dlp/yt-dlp/commit/f44cb4e77bb9be8be291d02ab6f79dc0b4c0d4a1) ([#6427](https://github.com/yt-dlp/yt-dlp/issues/6427)) by [Ferdi265](https://github.com/Ferdi265) +- tunein + - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/46580ced56c90b559885aded6aa8f46f20a9cdce) ([#6310](https://github.com/yt-dlp/yt-dlp/issues/6310)) by [elyse0](https://github.com/elyse0) +- twitch + - [Update for GraphQL API changes](https://github.com/yt-dlp/yt-dlp/commit/4a6272c6d1bff89969b67cd22b26ebe6d7e72279) ([#6318](https://github.com/yt-dlp/yt-dlp/issues/6318)) by [elyse0](https://github.com/elyse0) +- twitter + - [Fix retweet extraction](https://github.com/yt-dlp/yt-dlp/commit/cf605226521e99c89fc8dff26a319025810e63a0) ([#6422](https://github.com/yt-dlp/yt-dlp/issues/6422)) by [selfisekai](https://github.com/selfisekai) +- xvideos + - quickies: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/283a0b5bc511f3b350eead4488158f50c20ec526) ([#6414](https://github.com/yt-dlp/yt-dlp/issues/6414)) by [Yakabuff](https://github.com/Yakabuff) + +#### Misc. changes +- build + - [Fix publishing to PyPI and homebrew](https://github.com/yt-dlp/yt-dlp/commit/55676fe498345a389a2539d8baaba958d6d61c3e) by [bashonly](https://github.com/bashonly) + - [Only archive if `vars.ARCHIVE_REPO` is set](https://github.com/yt-dlp/yt-dlp/commit/08ff6d59f97b5f5f0128f6bf6fbef56fd836cc52) by [Grub4K](https://github.com/Grub4K) +- cleanup + - Miscellaneous: [392389b](https://github.com/yt-dlp/yt-dlp/commit/392389b7df7b818f794b231f14dc396d4875fbad) by [pukkandan](https://github.com/pukkandan) +- devscripts + - `make_changelog`: [Stop at `Release ...` commit](https://github.com/yt-dlp/yt-dlp/commit/7accdd9845fe7ce9d0aa5a9d16faaa489c1294eb) by [pukkandan](https://github.com/pukkandan) + +### 2023.03.03 + +#### Important changes +- **A new release type has been added!** + * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs). + * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`). + * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades). + * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags. + * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG` +- **YouTube throttling fixes!** + +#### Core changes +- [Add option `--break-match-filters`](https://github.com/yt-dlp/yt-dlp/commit/fe2ce85aff0aa03735fc0152bb8cb9c3d4ef0753) by [pukkandan](https://github.com/pukkandan) +- [Fix `--break-on-existing` with `--lazy-playlist`](https://github.com/yt-dlp/yt-dlp/commit/d21056f4cf0a1623daa107f9181074f5725ac436) by [pukkandan](https://github.com/pukkandan) +- dependencies + - [Simplify `Cryptodome`](https://github.com/yt-dlp/yt-dlp/commit/65f6e807804d2af5e00f2aecd72bfc43af19324a) by [pukkandan](https://github.com/pukkandan) +- jsinterp + - [Handle `Date` at epoch 0](https://github.com/yt-dlp/yt-dlp/commit/9acf1ee25f7ad3920ede574a9de95b8c18626af4) by [pukkandan](https://github.com/pukkandan) +- plugins + - [Don't look in `.egg` directories](https://github.com/yt-dlp/yt-dlp/commit/b059188383eee4fa336ef728dda3ff4bb7335625) by [pukkandan](https://github.com/pukkandan) +- update + - [Add option `--update-to`, including to nightly](https://github.com/yt-dlp/yt-dlp/commit/77df20f14cc9ed41dfe3a1fe2d77fd27f5365a94) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) +- utils + - `LenientJSONDecoder`: [Parse unclosed objects](https://github.com/yt-dlp/yt-dlp/commit/cc09083636ce21e58ff74f45eac2dbda507462b0) by [pukkandan](https://github.com/pukkandan) + - `Popen`: [Shim undocumented `text_mode` property](https://github.com/yt-dlp/yt-dlp/commit/da8e2912b165005f76779a115a071cd6132ceedf) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- [Fix DRM detection in m3u8](https://github.com/yt-dlp/yt-dlp/commit/43a3eaf96393b712d60cbcf5c6cb1e90ed7f42f5) by [pukkandan](https://github.com/pukkandan) +- generic + - [Detect manifest links via extension](https://github.com/yt-dlp/yt-dlp/commit/b38cae49e6f4849c8ee2a774bdc3c1c647ae5f0e) by [bashonly](https://github.com/bashonly) + - [Handle basic-auth when checking redirects](https://github.com/yt-dlp/yt-dlp/commit/8e9fe43cd393e69fa49b3d842aa3180c1d105b8f) by [pukkandan](https://github.com/pukkandan) +- GoogleDrive + - [Fix some audio](https://github.com/yt-dlp/yt-dlp/commit/4d248e29d20d983ededab0b03d4fe69dff9eb4ed) by [pukkandan](https://github.com/pukkandan) +- iprima + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9fddc12ab022a31754e0eaa358fc4e1dfa974587) ([#6291](https://github.com/yt-dlp/yt-dlp/issues/6291)) by [std-move](https://github.com/std-move) +- mediastream + - [Improve WinSports support](https://github.com/yt-dlp/yt-dlp/commit/2d5a8c5db2bd4ff1c2e45e00cd890a10f8ffca9e) ([#6401](https://github.com/yt-dlp/yt-dlp/issues/6401)) by [bashonly](https://github.com/bashonly) +- ntvru + - [Extract HLS and DASH formats](https://github.com/yt-dlp/yt-dlp/commit/77d6d136468d0c23c8e79bc937898747804f585a) ([#6403](https://github.com/yt-dlp/yt-dlp/issues/6403)) by [bashonly](https://github.com/bashonly) +- tencent + - [Add more formats and info](https://github.com/yt-dlp/yt-dlp/commit/18d295c9e0f95adc179eef345b7af64d6372db78) ([#5950](https://github.com/yt-dlp/yt-dlp/issues/5950)) by [Hill-98](https://github.com/Hill-98) +- yle_areena + - [Extract non-Kaltura videos](https://github.com/yt-dlp/yt-dlp/commit/40d77d89027cd0e0ce31d22aec81db3e1d433900) ([#6402](https://github.com/yt-dlp/yt-dlp/issues/6402)) by [bashonly](https://github.com/bashonly) +- youtube + - [Construct dash formats with `range` query](https://github.com/yt-dlp/yt-dlp/commit/5038f6d713303e0967d002216e7a88652401c22a) by [pukkandan](https://github.com/pukkandan) (With fixes in [f34804b](https://github.com/yt-dlp/yt-dlp/commit/f34804b2f920f62a6e893a14a9e2a2144b14dd23) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)) + - [Detect and break on looping comments](https://github.com/yt-dlp/yt-dlp/commit/7f51861b1820c37b157a239b1fe30628d907c034) ([#6301](https://github.com/yt-dlp/yt-dlp/issues/6301)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract channel `view_count` when `/about` tab is passed](https://github.com/yt-dlp/yt-dlp/commit/31e183557fcd1b937582f9429f29207c1261f501) by [pukkandan](https://github.com/pukkandan) + +#### Misc. changes +- build + - [Add `cffi` as a dependency for `yt_dlp_linux`](https://github.com/yt-dlp/yt-dlp/commit/776d1c3f0c9b00399896dd2e40e78e9a43218109) by [bashonly](https://github.com/bashonly) + - [Automated builds and nightly releases](https://github.com/yt-dlp/yt-dlp/commit/29cb20bd563c02671b31dd840139e93dd37150a1) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [bfc861a](https://github.com/yt-dlp/yt-dlp/commit/bfc861a91ee65c9b0ac169754f512e052c6827cf) by [pukkandan](https://github.com/pukkandan)) + - [Sign SHA files and release public key](https://github.com/yt-dlp/yt-dlp/commit/12647e03d417feaa9ea6a458bea5ebd747494a53) by [Grub4K](https://github.com/Grub4K) +- cleanup + - [Fix `Changelog`](https://github.com/yt-dlp/yt-dlp/commit/17ca19ab60a6a13eb8a629c51442b5248b0d8394) by [pukkandan](https://github.com/pukkandan) + - jsinterp: [Give functions names to help debugging](https://github.com/yt-dlp/yt-dlp/commit/b2e0343ba0fc5d8702e90f6ba2b71358e2677e0b) by [pukkandan](https://github.com/pukkandan) + - Miscellaneous: [4815bbf](https://github.com/yt-dlp/yt-dlp/commit/4815bbfc41cf641e4a0650289dbff968cb3bde76), [5b28cef](https://github.com/yt-dlp/yt-dlp/commit/5b28cef72db3b531680d89c121631c73ae05354f) by [pukkandan](https://github.com/pukkandan) +- devscripts + - [Script to generate changelog](https://github.com/yt-dlp/yt-dlp/commit/d400e261cf029a3f20d364113b14de973be75404) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [Grub4K](https://github.com/Grub4K) (With fixes in [9344964](https://github.com/yt-dlp/yt-dlp/commit/93449642815a6973a4b09b289982ca7e1f961b5f)) + +### 2023.02.17 + +* Merge youtube-dl: Upto [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e) +* Fix `--concat-playlist` +* Imply `--no-progress` when `--print` +* Improve default subtitle language selection by [sdht0](https://github.com/sdht0) +* Make `title` completely non-fatal +* Sanitize formats before sorting by [pukkandan](https://github.com/pukkandan) +* Support module level `__bool__` and `property` +* [dependencies] Standardize `Cryptodome` imports +* [hls] Allow extractors to provide AES key by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [ExtractAudio] Handle outtmpl without ext by [carusocr](https://github.com/carusocr) +* [extractor/common] Fix `_search_nuxt_data` by [LowSuggestion912](https://github.com/LowSuggestion912) +* [extractor/generic] Avoid catastrophic backtracking in KVS regex by [bashonly](https://github.com/bashonly) +* [jsinterp] Support `if` statements +* [plugins] Fix zip search paths +* [utils] `traverse_obj`: Various improvements by [Grub4K](https://github.com/Grub4K) +* [utils] `traverse_obj`: Fix more bugs +* [utils] `traverse_obj`: Fix several behavioral problems by [Grub4K](https://github.com/Grub4K) +* [utils] Don't use Content-length with encoding by [felixonmars](https://github.com/felixonmars) +* [utils] Fix `time_seconds` to use the provided TZ by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) +* [utils] Fix race condition in `make_dir` by [aionescu](https://github.com/aionescu) +* [utils] Use local kernel32 for file locking on Windows by [Grub4K](https://github.com/Grub4K) +* [compat_utils] Improve `passthrough_module` +* [compat_utils] Simplify `EnhancedModule` +* [build] Update pyinstaller +* [pyinst] Fix for pyinstaller 5.8 +* [devscripts] Provide `pyinstaller` hooks +* [devscripts/pyinstaller] Analyze sub-modules of `Cryptodome` +* [cleanup] Misc fixes and cleanup +* [extractor/anchorfm] Add episode extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/boxcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/ebay] Add extractor by [JChris246](https://github.com/JChris246) +* [extractor/hypergryph] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/NZOnScreen] Add extractor by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) +* [extractor/rozhlas] Add extractor RozhlasVltavaIE by [amra](https://github.com/amra) +* [extractor/tempo] Add IVXPlayer extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/txxx] Add extractors by [chio0hai](https://github.com/chio0hai) +* [extractor/vocaroo] Add extractor by [SuperSonicHub1](https://github.com/SuperSonicHub1), [qbnu](https://github.com/qbnu) +* [extractor/wrestleuniverse] Add extractors by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [dirkf](https://github.com/dirkf) +* [extractor/youtube] **Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) +* [extractor/youtube] Add hyperpipe instances by [Generator](https://github.com/Generator) +* [extractor/youtube] Handle `consent.youtube` +* [extractor/youtube] Support `/live/` URL +* [extractor/youtube] Update invidious and piped instances by [rohieb](https://github.com/rohieb) +* [extractor/91porn] Fix title and comment extraction by [pmitchell86](https://github.com/pmitchell86) +* [extractor/AbemaTV] Cache user token whenever appropriate by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/bfmtv] Support `rmc` prefix by [carusocr](https://github.com/carusocr) +* [extractor/biliintl] Add intro and ending chapters by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/clyp] Support `wav` by [qulaz](https://github.com/qulaz) +* [extractor/crunchyroll] Add intro chapter by [ByteDream](https://github.com/ByteDream) +* [extractor/crunchyroll] Better message for premium videos +* [extractor/crunchyroll] Fix incorrect premium-only error by [Grub4K](https://github.com/Grub4K) +* [extractor/DouyuTV] Use new API by [hatienl0i261299](https://github.com/hatienl0i261299) +* [extractor/embedly] Embedded links may be for other extractors +* [extractor/freesound] Workaround invalid URL in webpage by [rebane2001](https://github.com/rebane2001) +* [extractor/GoPlay] Use new API by [jeroenj](https://github.com/jeroenj) +* [extractor/Hidive] Fix subtitles and age-restriction by [chexxor](https://github.com/chexxor) +* [extractor/huya] Support HD streams by [felixonmars](https://github.com/felixonmars) +* [extractor/moviepilot] Fix extractor by [panatexxa](https://github.com/panatexxa) +* [extractor/nbc] Fix `NBC` and `NBCStations` extractors by [bashonly](https://github.com/bashonly) +* [extractor/nbc] Fix XML parsing by [bashonly](https://github.com/bashonly) +* [extractor/nebula] Remove broken cookie support by [hheimbuerger](https://github.com/hheimbuerger) +* [extractor/nfl] Add `NFLPlus` extractors by [bashonly](https://github.com/bashonly) +* [extractor/niconico] Add support for like history by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) +* [extractor/nitter] Update instance list by [OIRNOIR](https://github.com/OIRNOIR) +* [extractor/npo] Fix extractor and add HD support by [seproDev](https://github.com/seproDev) +* [extractor/odkmedia] Add `OnDemandChinaEpisodeIE` by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) +* [extractor/pornez] Handle relative URLs in iframe by [JChris246](https://github.com/JChris246) +* [extractor/radiko] Fix format sorting for Time Free by [road-master](https://github.com/road-master) +* [extractor/rcs] Fix extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +* [extractor/reddit] Support user posts by [OMEGARAZER](https://github.com/OMEGARAZER) +* [extractor/rumble] Fix format sorting by [pukkandan](https://github.com/pukkandan) +* [extractor/servus] Rewrite extractor by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +* [extractor/slideslive] Fix slides and chapters/duration by [bashonly](https://github.com/bashonly) +* [extractor/SportDeutschland] Fix extractor by [FriedrichRehren](https://github.com/FriedrichRehren) +* [extractor/Stripchat] Fix extractor by [JChris246](https://github.com/JChris246), [bashonly](https://github.com/bashonly) +* [extractor/tnaflix] Fix extractor by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) +* [extractor/tvp] Support `stream.tvp.pl` by [selfisekai](https://github.com/selfisekai) +* [extractor/twitter] Fix `--no-playlist` and add media `view_count` when using GraphQL by [Grub4K](https://github.com/Grub4K) +* [extractor/twitter] Fix graphql extraction on some tweets by [selfisekai](https://github.com/selfisekai) +* [extractor/vimeo] Fix `playerConfig` extraction by [LeoniePhiline](https://github.com/LeoniePhiline), [bashonly](https://github.com/bashonly) +* [extractor/viu] Add `ViuOTTIndonesiaIE` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vk] Fix playlists for new API by [the-marenga](https://github.com/the-marenga) +* [extractor/vlive] Replace with `VLiveWebArchiveIE` by [seproDev](https://github.com/seproDev) +* [extractor/ximalaya] Update album `_VALID_URL` by [carusocr](https://github.com/carusocr) +* [extractor/zdf] Use android API endpoint for UHD downloads by [seproDev](https://github.com/seproDev) +* [extractor/drtv] Fix bug in [ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/ab4cbef) by [bashonly](https://github.com/bashonly) + + +### 2023.01.06 + +* Fix config locations by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [downloader/aria2c] Disable native progress +* [utils] `mimetype2ext`: `weba` is not standard +* [utils] `windows_enable_vt_mode`: Better error handling +* [build] Add minimal `pyproject.toml` +* [update] Fix updater file removal on windows by [Grub4K](https://github.com/Grub4K) +* [cleanup] Misc fixes and cleanup +* [extractor/aitube] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/drtv] Add series extractors by [FrederikNS](https://github.com/FrederikNS) +* [extractor/volejtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/xanimu] Add extractor by [JChris246](https://github.com/JChris246) +* [extractor/youtube] Retry manifest refresh for live-from-start by [mzhou](https://github.com/mzhou) +* [extractor/biliintl] Add `/media` to `VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/biliIntl] Add fallback to `video_data` by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/crunchyroll:show] Add `language` to entries by [Chrissi2812](https://github.com/Chrissi2812) +* [extractor/joj] Fix extractor by [OndrejBakan](https://github.com/OndrejBakan), [pukkandan](https://github.com/pukkandan) +* [extractor/nbc] Update graphql query by [jacobtruman](https://github.com/jacobtruman) +* [extractor/reddit] Add subreddit as `channel_id` by [gschizas](https://github.com/gschizas) +* [extractor/tiktok] Add `TikTokLive` extractor by [JC-Chung](https://github.com/JC-Chung) + +### 2023.01.02 + +* **Improve plugin architecture** by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) + * Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.) and can be distributed and installed as packages. See [the readme](https://github.com/yt-dlp/yt-dlp/tree/05997b6e98e638d97d409c65bb5eb86da68f3b64#plugins) for more information +* Add `--compat-options 2021,2022` + * This allows devs to change defaults and make other potentially breaking changes more easily. If you need everything to work exactly as-is, put Use `--compat 2022` in your config to guard against future compat changes. +* [downloader/aria2c] Native progress for aria2c via RPC by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) +* Merge youtube-dl: Upto [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f6) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) +* Add pre-processor stage `video` +* Let `--parse/replace-in-metadata` run at any post-processing stage +* Add `--enable-file-urls` by [coletdjnz](https://github.com/coletdjnz) +* Add new field `aspect_ratio` +* Add `ac4` to known codecs +* Add `weba` to known extensions +* [FFmpegVideoConvertor] Add `gif` to `--recode-video` +* Add message when there are no subtitles/thumbnails +* Deprioritize HEVC-over-FLV formats by [Lesmiscore](https://github.com/Lesmiscore) +* Make early reject of `--match-filter` stricter +* Fix `--cookies-from-browser` CLI parsing +* Fix `original_url` in playlists +* Fix bug in writing playlist info-json +* Fix bugs in `PlaylistEntries` +* [downloader/ffmpeg] Fix headers for video+audio formats by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor] Add a way to distinguish IEs that returns only videos +* [extractor] Implement universal format sorting and deprecate `_sort_formats` +* [extractor] Let `_extract_format` functions obey `--ignore-no-formats` +* [extractor/generic] Add `fragment_query` extractor arg for DASH and HLS by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/generic] Decode unicode-escaped embed URLs by [bashonly](https://github.com/bashonly) +* [extractor/generic] Don't report redirect to https +* [extractor/generic] Fix JSON LD manifest extraction by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/generic] Use `Accept-Encoding: identity` for initial request by [coletdjnz](https://github.com/coletdjnz) +* [FormatSort] Add `mov` to `vext` +* [jsinterp] Escape regex that looks like nested set +* [webvtt] Handle premature EOF by [flashdagger](https://github.com/flashdagger) +* [utils] `classproperty`: Add cache support +* [utils] `get_exe_version`: Detect broken executables by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [utils] `js_to_json`: Fix bug in [f55523c](https://github.com/yt-dlp/yt-dlp/commit/f55523c) by [ChillingPepper](https://github.com/ChillingPepper), [pukkandan](https://github.com/pukkandan) +* [utils] Make `ExtractorError` mutable +* [utils] Move `FileDownloader.parse_bytes` into utils +* [utils] Move format sorting code into `utils` +* [utils] `windows_enable_vt_mode`: Proper implementation by [Grub4K](https://github.com/Grub4K) +* [update] Workaround [#5632](https://github.com/yt-dlp/yt-dlp/issues/5632) +* [docs] Improvements +* [cleanup] Misc fixes and cleanup +* [cleanup] Use `random.choices` by [freezboltz](https://github.com/freezboltz) +* [extractor/airtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/amazonminitv] Add extractors by [GautamMKGarg](https://github.com/GautamMKGarg), [nyuszika7h](https://github.com/nyuszika7h) +* [extractor/beatbump] Add extractors by [Bobscorn](https://github.com/Bobscorn), [pukkandan](https://github.com/pukkandan) +* [extractor/europarl] Add EuroParlWebstream extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/kanal2] Add extractor by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc), [pukkandan](https://github.com/pukkandan) +* [extractor/kankanews] Add extractor by [synthpop123](https://github.com/synthpop123) +* [extractor/kick] Add extractor by [bashonly](https://github.com/bashonly) +* [extractor/mediastream] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0) +* [extractor/noice] Add NoicePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/oneplace] Add OnePlacePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/rumble] Add RumbleIE extractor by [flashdagger](https://github.com/flashdagger) +* [extractor/screencastify] Add extractor by [bashonly](https://github.com/bashonly) +* [extractor/trtcocuk] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/Veoh] Add user extractor by [tntmod54321](https://github.com/tntmod54321) +* [extractor/videoken] Add extractors by [bashonly](https://github.com/bashonly) +* [extractor/webcamerapl] Add extractor by [milkknife](https://github.com/milkknife) +* [extractor/amazon] Add `AmazonReviews` extractor by [bashonly](https://github.com/bashonly) +* [extractor/netverse] Add `NetverseSearch` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vimeo] Add `VimeoProIE` by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/xiami] Remove extractors by [synthpop123](https://github.com/synthpop123) +* [extractor/youtube] Add `piped.video` by [Bnyro](https://github.com/Bnyro) +* [extractor/youtube] Consider language in format de-duplication +* [extractor/youtube] Extract DRC formats +* [extractor/youtube] Fix `ytuser:` +* [extractor/youtube] Fix bug in handling of music URLs +* [extractor/youtube] Subtitles cannot be translated to `und` +* [extractor/youtube:tab] Extract metadata from channel items by [coletdjnz](https://github.com/coletdjnz) +* [extractor/ARD] Add vtt subtitles by [CapacitorSet](https://github.com/CapacitorSet) +* [extractor/ArteTV] Extract chapters by [bashonly](https://github.com/bashonly), [iw0nderhow](https://github.com/iw0nderhow) +* [extractor/bandcamp] Add `album_artist` by [stelcodes](https://github.com/stelcodes) +* [extractor/bilibili] Fix `--no-playlist` for anthology +* [extractor/bilibili] Improve `_VALID_URL` by [skbeh](https://github.com/skbeh) +* [extractor/biliintl:series] Make partial download of series faster +* [extractor/BiliLive] Fix extractor +* [extractor/brightcove] Add `BrightcoveNewBaseIE` and fix embed extraction +* [extractor/cda] Support premium and misc improvements by [selfisekai](https://github.com/selfisekai) +* [extractor/ciscowebex] Support password-protected videos by [damianoamatruda](https://github.com/damianoamatruda) +* [extractor/curiositystream] Fix auth by [mnn](https://github.com/mnn) +* [extractor/embedly] Handle vimeo embeds +* [extractor/fifa] Fix Preplay extraction by [dirkf](https://github.com/dirkf) +* [extractor/foxsports] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/gronkh] Fix `_VALID_URL` by [muddi900](https://github.com/muddi900) +* [extractor/hotstar] Improve format metadata +* [extractor/iqiyi] Fix `Iq` JS regex by [bashonly](https://github.com/bashonly) +* [extractor/la7] Improve extractor by [nixxo](https://github.com/nixxo) +* [extractor/mediaset] Better embed detection and error messages by [nixxo](https://github.com/nixxo) +* [extractor/mixch] Support `--wait-for-video` +* [extractor/naver] Improve `_VALID_URL` for `NaverNowIE` by [bashonly](https://github.com/bashonly) +* [extractor/naver] Treat fan subtitles as separate language +* [extractor/netverse] Extract comments by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/nosnl] Add support for /video by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/odnoklassniki] Extract subtitles by [bashonly](https://github.com/bashonly) +* [extractor/pinterest] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/plutotv] Fix videos with non-zero start by [digitall](https://github.com/digitall) +* [extractor/polskieradio] Adapt to next.js redesigns by [selfisekai](https://github.com/selfisekai) +* [extractor/reddit] Add vcodec to fallback format by [chengzhicn](https://github.com/chengzhicn) +* [extractor/reddit] Extract crossposted media by [bashonly](https://github.com/bashonly) +* [extractor/reddit] Extract video embeds in text posts by [bashonly](https://github.com/bashonly) +* [extractor/rutube] Support private videos by [mexus](https://github.com/mexus) +* [extractor/sibnet] Separate from VKIE +* [extractor/slideslive] Fix extractor by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/slideslive] Support embeds and slides by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/soundcloud] Support user permalink by [nosoop](https://github.com/nosoop) +* [extractor/spankbang] Fix extractor by [JChris246](https://github.com/JChris246) +* [extractor/stv] Detect DRM +* [extractor/swearnet] Fix description bug +* [extractor/tencent] Fix geo-restricted video by [elyse0](https://github.com/elyse0) +* [extractor/tiktok] Fix subs, `DouyinIE`, improve `_VALID_URL` by [bashonly](https://github.com/bashonly) +* [extractor/tiktok] Update `_VALID_URL`, add `api_hostname` arg by [bashonly](https://github.com/bashonly) +* [extractor/tiktok] Update API hostname by [redraskal](https://github.com/redraskal) +* [extractor/twitcasting] Fix videos with password by [Spicadox](https://github.com/Spicadox), [bashonly](https://github.com/bashonly) +* [extractor/twitter] Heed `--no-playlist` for multi-video tweets by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/twitter] Refresh guest token when expired by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/twitter:spaces] Add `Referer` to m3u8 by [nixxo](https://github.com/nixxo) +* [extractor/udemy] Fix lectures that have no URL and detect DRM +* [extractor/unsupported] Add more URLs +* [extractor/urplay] Support for audio-only formats by [barsnick](https://github.com/barsnick) +* [extractor/wistia] Improve extension detection by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/yle_areena] Support restricted videos by [docbender](https://github.com/docbender) +* [extractor/youku] Fix extractor by [KurtBestor](https://github.com/KurtBestor) +* [extractor/youporn] Fix metadata by [marieell](https://github.com/marieell) +* [extractor/redgifs] Fix bug in [8c188d5](https://github.com/yt-dlp/yt-dlp/commit/8c188d5d09177ed213a05c900d3523867c5897fd) + + +### 2022.11.11 + +* Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128) +* Backport SSL configuration from Python 3.10 by [coletdjnz](https://github.com/coletdjnz) +* Do more processing in `--flat-playlist` +* Fix `--list` options not implying `-s` in some cases by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* Fix end time of clips by [cruel-efficiency](https://github.com/cruel-efficiency) +* Fix for `formats=None` +* Write API params in debug head +* [outtmpl] Ensure ASCII in json and add option for Unicode +* [SponsorBlock] Add `type` field, obey `--retry-sleep extractor`, relax duration check for large segments +* [SponsorBlock] **Support `chapter` category** by [ajayyy](https://github.com/ajayyy), [pukkandan](https://github.com/pukkandan) +* [ThumbnailsConvertor] Fix filename escaping by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [ModifyChapters] Handle the entire video being marked for removal +* [embedthumbnail] Fix thumbnail name in mp3 by [How-Bout-No](https://github.com/How-Bout-No) +* [downloader/fragment] HLS download can continue without first fragment +* [cookies] Improve `LenientSimpleCookie` by [Grub4K](https://github.com/Grub4K) +* [jsinterp] Improve separating regex +* [extractor/common] Fix `fatal=False` for `_search_nuxt_data` +* [extractor/common] Improve `_generic_title` +* [extractor/common] Fix `json_ld` type checks by [Grub4K](https://github.com/Grub4K) +* [extractor/generic] Separate embed extraction into own function +* [extractor/generic:quoted-html] Add extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/unsupported] Raise error on known DRM-only sites by [coletdjnz](https://github.com/coletdjnz) +* [utils] `js_to_json`: Improve escape handling by [Grub4K](https://github.com/Grub4K) +* [utils] `strftime_or_none`: Workaround Python bug on Windows +* [utils] `traverse_obj`: Always return list when branching, allow `re.Match` objects by [Grub4K](https://github.com/Grub4K) +* [build, test] Harden workflows' security by [sashashura](https://github.com/sashashura) +* [build] `py2exe`: Migrate to freeze API by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan) +* [build] Create `armv7l` and `aarch64` releases by [MrOctopus](https://github.com/MrOctopus), [pukkandan](https://github.com/pukkandan) +* [build] Make linux binary truly standalone using `conda` by [mlampe](https://github.com/mlampe) +* [build] Replace `set-output` with `GITHUB_OUTPUT` by [Lesmiscore](https://github.com/Lesmiscore) +* [update] Use error code `100` for update errors +* [compat] Fix `shutils.move` in restricted ACL mode on BSD by [ClosedPort22](https://github.com/ClosedPort22), [pukkandan](https://github.com/pukkandan) +* [docs, devscripts] Document `pyinst`'s argument passthrough by [jahway603](https://github.com/jahway603) +* [test] Allow `extract_flat` in download tests by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [Alienmaster](https://github.com/Alienmaster) +* [extractor/aeon] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/agora] Add extractors by [selfisekai](https://github.com/selfisekai) +* [extractor/camsoda] Add extractor by [zulaport](https://github.com/zulaport) +* [extractor/cinetecamilano] Add extractor by [timendum](https://github.com/timendum) +* [extractor/deuxm] Add extractors by [CrankDatSouljaBoy](https://github.com/CrankDatSouljaBoy) +* [extractor/genius] Add extractors by [bashonly](https://github.com/bashonly) +* [extractor/japandiet] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/listennotes] Add extractor by [lksj](https://github.com/lksj), [pukkandan](https://github.com/pukkandan) +* [extractor/nos.nl] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/oftv] Add extractors by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/podbayfm] Add extractor by [schnusch](https://github.com/schnusch) +* [extractor/qingting] Add extractor by [bashonly](https://github.com/bashonly), [changren-wcr](https://github.com/changren-wcr) +* [extractor/screen9] Add extractor by [tpikonen](https://github.com/tpikonen) +* [extractor/swearnet] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/YleAreena] Add extractor by [pukkandan](https://github.com/pukkandan), [vitkhab](https://github.com/vitkhab) +* [extractor/zeenews] Add extractor by [m4tu4g](https://github.com/m4tu4g), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube:tab] **Update tab handling for redesign** by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) + * Channel URLs download all uploads of the channel as multiple playlists, separated by tab +* [extractor/youtube] Differentiate between no comments and disabled comments by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Extract `concurrent_view_count` for livestreams by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Fix `duration` for premieres by [nosoop](https://github.com/nosoop) +* [extractor/youtube] Fix `live_status` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube] Ignore incomplete data error for comment replies by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Improve chapter parsing from description +* [extractor/youtube] Mark videos as fully watched by [bsun0000](https://github.com/bsun0000) +* [extractor/youtube] Update piped instances by [Generator](https://github.com/Generator) +* [extractor/youtube] Update playlist metadata extraction for new layout by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube:tab] Fix video metadata from tabs by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube:tab] Let `approximate_date` return timestamp +* [extractor/americastestkitchen] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/bbc] Support onion domains by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/bilibili] Add chapters and misc cleanup by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan) +* [extractor/bilibili] Fix BilibiliIE and Bangumi extractors by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan) +* [extractor/bitchute] Better error for geo-restricted videos by [flashdagger](https://github.com/flashdagger) +* [extractor/bitchute] Improve `BitChuteChannelIE` by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) +* [extractor/bitchute] Simplify extractor by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) +* [extractor/cda] Support login through API by [selfisekai](https://github.com/selfisekai) +* [extractor/crunchyroll] Beta is now the only layout by [tejing1](https://github.com/tejing1) +* [extractor/detik] Avoid unnecessary extraction +* [extractor/doodstream] Remove extractor +* [extractor/dplay] Add MotorTrendOnDemand extractor by [bashonly](https://github.com/bashonly) +* [extractor/epoch] Support videos without data-trailer by [gibson042](https://github.com/gibson042), [pukkandan](https://github.com/pukkandan) +* [extractor/fox] Extract thumbnail by [vitkhab](https://github.com/vitkhab) +* [extractor/foxnews] Add `FoxNewsVideo` extractor +* [extractor/hotstar] Add season support by [m4tu4g](https://github.com/m4tu4g) +* [extractor/hotstar] Refactor v1 API calls +* [extractor/iprima] Make json+ld non-fatal by [bashonly](https://github.com/bashonly) +* [extractor/iq] Increase phantomjs timeout +* [extractor/kaltura] Support playlists by [jwoglom](https://github.com/jwoglom), [pukkandan](https://github.com/pukkandan) +* [extractor/lbry] Authenticate with cookies by [flashdagger](https://github.com/flashdagger) +* [extractor/livestreamfails] Support posts by [invertico](https://github.com/invertico) +* [extractor/mlb] Add `MLBArticle` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/mxplayer] Improve extractor by [m4tu4g](https://github.com/m4tu4g) +* [extractor/niconico] Always use HTTPS for requests +* [extractor/nzherald] Support new video embed by [coletdjnz](https://github.com/coletdjnz) +* [extractor/odnoklassniki] Support boosty.to embeds by [Lesmiscore](https://github.com/Lesmiscore), [megapro17](https://github.com/megapro17), [pukkandan](https://github.com/pukkandan) +* [extractor/paramountplus] Update API token by [bashonly](https://github.com/bashonly) +* [extractor/reddit] Add fallback format by [bashonly](https://github.com/bashonly) +* [extractor/redgifs] Fix extractors by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/redgifs] Refresh auth token for 401 by [endotronic](https://github.com/endotronic), [pukkandan](https://github.com/pukkandan) +* [extractor/rumble] Add HLS formats and extract more metadata by [flashdagger](https://github.com/flashdagger) +* [extractor/sbs] Improve `_VALID_URL` by [bashonly](https://github.com/bashonly) +* [extractor/skyit] Fix extractors by [nixxo](https://github.com/nixxo) +* [extractor/stripchat] Fix hostname for HLS stream by [zulaport](https://github.com/zulaport) +* [extractor/stripchat] Improve error message by [freezboltz](https://github.com/freezboltz) +* [extractor/telegram] Add playlist support and more metadata by [bashonly](https://github.com/bashonly), [bsun0000](https://github.com/bsun0000) +* [extractor/Tnaflix] Fix for HTTP 500 by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan) +* [extractor/tubitv] Better DRM detection by [bashonly](https://github.com/bashonly) +* [extractor/tvp] Update extractors by [selfisekai](https://github.com/selfisekai) +* [extractor/twitcasting] Fix `data-movie-playlist` extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/twitter] Add onion site to `_VALID_URL` by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/twitter] Add Spaces extractor and GraphQL API by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +* [extractor/twitter] Support multi-video posts by [Grub4K](https://github.com/Grub4K) +* [extractor/uktvplay] Fix `_VALID_URL` +* [extractor/viu] Support subtitles of on-screen text by [tkgmomosheep](https://github.com/tkgmomosheep) +* [extractor/VK] Fix playlist URLs by [the-marenga](https://github.com/the-marenga) +* [extractor/vlive] Extract `release_timestamp` +* [extractor/voot] Improve `_VALID_URL` by [freezboltz](https://github.com/freezboltz) +* [extractor/wordpress:mb.miniAudioPlayer] Add embed extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/YoutubeWebArchive] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz) +* [extractor/zee5] Improve `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g) +* [extractor/zenyandex] Fix extractors by [lksj](https://github.com/lksj), [puc9](https://github.com/puc9), [pukkandan](https://github.com/pukkandan) + + +### 2022.10.04 + +* Allow a `set` to be passed as `download_archive` by [pukkandan](https://github.com/pukkandan), [bashonly](https://github.com/bashonly) +* Allow open ranges for time ranges by [Lesmiscore](https://github.com/Lesmiscore) +* Allow plugin extractors to replace the built-in ones +* Don't download entire video when no matching `--download-sections` +* Fix `--config-location -` +* Improve [5736d79](https://github.com/yt-dlp/yt-dlp/pull/5044/commits/5736d79172c47ff84740d5720467370a560febad) +* Fix for when playlists don't have `webpage_url` +* Support environment variables in `--ffmpeg-location` +* Workaround `libc_ver` not be available on Windows Store version of Python +* [outtmpl] Curly braces to filter keys by [pukkandan](https://github.com/pukkandan) +* [outtmpl] Make `%s` work in strfformat for all systems +* [jsinterp] Workaround operator associativity issue +* [cookies] Let `_get_mac_keyring_password` fail gracefully +* [cookies] Parse cookies leniently by [Grub4K](https://github.com/Grub4K) +* [phantomjs] Fix bug in [587021c](https://github.com/yt-dlp/yt-dlp/commit/587021cd9f717181b44e881941aca3f8d753758b) by [elyse0](https://github.com/elyse0) +* [downloader/aria2c] Fix filename containing leading whitespace by [std-move](https://github.com/std-move) +* [downloader/ism] Support ec-3 codec by [nixxo](https://github.com/nixxo) +* [extractor] Fix `fatal=False` in `RetryManager` +* [extractor] Improve json-ld extraction +* [extractor] Make `_search_json` able to parse lists +* [extractor] Escape `%` in `representation_id` of m3u8 +* [extractor/generic] Pass through referer from json-ld +* [utils] `base_url`: URL paths can contain `&` by [elyse0](https://github.com/elyse0) +* [utils] `js_to_json`: Improve +* [utils] `Popen.run`: Fix default return in binary mode +* [utils] `traverse_obj`: Rewrite, document and add tests by [Grub4K](https://github.com/Grub4K) +* [devscripts] `make_lazy_extractors`: Fix for Docker by [josanabr](https://github.com/josanabr) +* [docs] Misc Improvements +* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [gamer191](https://github.com/gamer191) +* [extractor/24tv.ua] Add extractors by [coletdjnz](https://github.com/coletdjnz) +* [extractor/BerufeTV] Add extractor by [Fabi019](https://github.com/Fabi019) +* [extractor/booyah] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0) +* [extractor/bundesliga] Add extractor by [Fabi019](https://github.com/Fabi019) +* [extractor/GoPlay] Add extractor by [CNugteren](https://github.com/CNugteren), [basrieter](https://github.com/basrieter), [jeroenj](https://github.com/jeroenj) +* [extractor/iltalehti] Add extractor by [tpikonen](https://github.com/tpikonen) +* [extractor/IsraelNationalNews] Add extractor by [Bobscorn](https://github.com/Bobscorn) +* [extractor/mediaworksnzvod] Add extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/MicrosoftEmbed] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/nbc] Add NBCStations extractor by [bashonly](https://github.com/bashonly) +* [extractor/onenewsnz] Add extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/prankcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [columndeeply](https://github.com/columndeeply) +* [extractor/Smotrim] Add extractor by [Lesmiscore](https://github.com/Lesmiscore), [nikita-moor](https://github.com/nikita-moor) +* [extractor/tencent] Add Iflix extractor by [elyse0](https://github.com/elyse0) +* [extractor/unscripted] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/adobepass] Add MSO AlticeOne (Optimum TV) by [CplPwnies](https://github.com/CplPwnies) +* [extractor/youtube] **Download `post_live` videos from start** by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube] Add support for Shorts audio pivot feed by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube] Detect `lazy-load-for-videos` embeds +* [extractor/youtube] Do not warn on duplicate chapters +* [extractor/youtube] Fix video like count extraction by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Support changing extraction language by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube:tab] Improve continuation items extraction +* [extractor/youtube:tab] Support `reporthistory` page +* [extractor/amazonstore] Fix JSON extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/amazonstore] Retry to avoid captcha page by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/animeondemand] Remove extractor by [TokyoBlackHole](https://github.com/TokyoBlackHole) +* [extractor/anvato] Fix extractor and refactor by [bashonly](https://github.com/bashonly) +* [extractor/artetv] Remove duplicate stream urls by [Grub4K](https://github.com/Grub4K) +* [extractor/audioboom] Support direct URLs and refactor by [pukkandan](https://github.com/pukkandan), [tpikonen](https://github.com/tpikonen) +* [extractor/bandcamp] Extract `uploader_url` +* [extractor/bilibili] Add space.bilibili extractors by [lockmatrix](https://github.com/lockmatrix) +* [extractor/BilibiliSpace] Fix extractor and better error message by [lockmatrix](https://github.com/lockmatrix) +* [extractor/BiliIntl] Support uppercase lang in `_VALID_URL` by [coletdjnz](https://github.com/coletdjnz) +* [extractor/BiliIntlSeries] Fix `_VALID_URL` +* [extractor/bongacams] Update `_VALID_URL` by [0xGodspeed](https://github.com/0xGodspeed) +* [extractor/crunchyroll:beta] Improve handling of hardsubs by [Grub4K](https://github.com/Grub4K) +* [extractor/detik] Generalize extractors by [HobbyistDev](https://github.com/HobbyistDev), [coletdjnz](https://github.com/coletdjnz) +* [extractor/dplay:italy] Add default authentication by [Timendum](https://github.com/Timendum) +* [extractor/heise] Fix extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/holodex] Fix `_VALID_URL` by [LiviaMedeiros](https://github.com/LiviaMedeiros) +* [extractor/hrfensehen] Fix extractor by [snapdgn](https://github.com/snapdgn) +* [extractor/hungama] Add subtitle by [GautamMKGarg](https://github.com/GautamMKGarg), [pukkandan](https://github.com/pukkandan) +* [extractor/instagram] Extract more metadata by [pritam20ps05](https://github.com/pritam20ps05) +* [extractor/JWPlatform] Fix extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/malltv] Fix video_id extraction by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/MLBTV] Detect live streams +* [extractor/motorsport] Support native embeds +* [extractor/Mxplayer] Fix extractor by [itachi-19](https://github.com/itachi-19) +* [extractor/nebula] Add nebula.tv by [tannertechnology](https://github.com/tannertechnology) +* [extractor/nfl] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack) +* [extractor/paramountplus] Better DRM detection by [bashonly](https://github.com/bashonly) +* [extractor/patreon] Sort formats +* [extractor/rcs] Fix embed extraction by [coletdjnz](https://github.com/coletdjnz) +* [extractor/redgifs] Fix extractor by [jhwgh1968](https://github.com/jhwgh1968) +* [extractor/rutube] Fix `_EMBED_REGEX` by [coletdjnz](https://github.com/coletdjnz) +* [extractor/RUTV] Fix warnings for livestreams by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/soundcloud:search] More metadata in `--flat-playlist` by [SuperSonicHub1](https://github.com/SuperSonicHub1) +* [extractor/telegraaf] Use mobile GraphQL API endpoint by [coletdjnz](https://github.com/coletdjnz) +* [extractor/tennistv] Fix timestamp by [zenerdi0de](https://github.com/zenerdi0de) +* [extractor/tiktok] Fix TikTokIE by [bashonly](https://github.com/bashonly) +* [extractor/triller] Fix auth token by [bashonly](https://github.com/bashonly) +* [extractor/trovo] Fix extractors by [Mehavoid](https://github.com/Mehavoid) +* [extractor/tv2] Support new url format by [tobi1805](https://github.com/tobi1805) +* [extractor/web.archive:youtube] Fix `_YT_INITIAL_PLAYER_RESPONSE_RE` +* [extractor/wistia] Add support for channels by [coletdjnz](https://github.com/coletdjnz) +* [extractor/wistia] Match IDs in embed URLs by [bashonly](https://github.com/bashonly) +* [extractor/wordpress:playlist] Add generic embed extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/yandexvideopreview] Update `_VALID_URL` by [Grub4K](https://github.com/Grub4K) +* [extractor/zee5] Fix `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g) +* [extractor/zee5] Generate device ids by [freezboltz](https://github.com/freezboltz) + + +### 2022.09.01 + +* Add option `--use-extractors` +* Merge youtube-dl: Upto [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7) +* Add yt-dlp version to infojson +* Fix `--break-per-url --max-downloads` +* Fix bug in `--alias` +* [cookies] Support firefox container in `--cookies-from-browser` by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [downloader/external] Smarter detection of executable +* [extractor/generic] Don't return JW player without formats +* [FormatSort] Fix `aext` for `--prefer-free-formats` +* [jsinterp] Various improvements by [pukkandan](https://github.com/pukkandan), [dirkf](https://github.com/dirkf), [elyse0](https://github.com/elyse0) +* [cache] Mechanism to invalidate old cache +* [utils] Add `deprecation_warning` +* [utils] Add `orderedSet_from_options` +* [utils] `Popen`: Restore `LD_LIBRARY_PATH` when using PyInstaller by [Lesmiscore](https://github.com/Lesmiscore) +* [build] `make tar` should not follow `DESTDIR` by [satan1st](https://github.com/satan1st) +* [build] Update pyinstaller by [shirt-dev](https://github.com/shirt-dev) +* [test] Fix `test_youtube_signature` +* [cleanup] Misc fixes and cleanup by [DavidH-2022](https://github.com/DavidH-2022), [MrRawes](https://github.com/MrRawes), [pukkandan](https://github.com/pukkandan) +* [extractor/epoch] Add extractor by [tejasa97](https://github.com/tejasa97) +* [extractor/eurosport] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/IslamChannel] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/newspicks] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/triller] Add extractor by [bashonly](https://github.com/bashonly) +* [extractor/VQQ] Add extractors by [elyse0](https://github.com/elyse0) +* [extractor/youtube] Improvements to nsig extraction +* [extractor/youtube] Fix bug in format sorting +* [extractor/youtube] Update iOS Innertube clients by [SamantazFox](https://github.com/SamantazFox) +* [extractor/youtube] Use device-specific user agent by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Add `--compat-option no-youtube-prefer-utc-upload-date` by [coletdjnz](https://github.com/coletdjnz) +* [extractor/arte] Bug fix by [cgrigis](https://github.com/cgrigis) +* [extractor/bilibili] Extract `flac` with premium account by [jackyyf](https://github.com/jackyyf) +* [extractor/BiliBiliSearch] Don't sort by date +* [extractor/BiliBiliSearch] Fix infinite loop +* [extractor/bitchute] Mark errors as expected +* [extractor/crunchyroll:beta] Use anonymous access by [tejing1](https://github.com/tejing1) +* [extractor/huya] Fix stream extraction by [ohaiibuzzle](https://github.com/ohaiibuzzle) +* [extractor/medaltv] Fix extraction by [xenova](https://github.com/xenova) +* [extractor/mediaset] Fix embed extraction +* [extractor/mixcloud] All formats are audio-only +* [extractor/rtbf] Fix jwt extraction by [elyse0](https://github.com/elyse0) +* [extractor/screencastomatic] Support `--video-password` by [shreyasminocha](https://github.com/shreyasminocha) +* [extractor/stripchat] Don't modify input URL by [dfaker](https://github.com/dfaker) +* [extractor/uktv] Improve `_VALID_URL` by [dirkf](https://github.com/dirkf) +* [extractor/vimeo:user] Fix `_VALID_URL` + + +### 2022.08.19 + +* Fix bug in `--download-archive` +* [jsinterp] **Fix for new youtube players** and related improvements by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [phantomjs] Add function to execute JS without a DOM by [MinePlayersPE](https://github.com/MinePlayersPE), [pukkandan](https://github.com/pukkandan) +* [build] Exclude devscripts from installs by [Lesmiscore](https://github.com/Lesmiscore) +* [cleanup] Misc fixes and cleanup +* [extractor/youtube] **Add fallback to phantomjs** for nsig +* [extractor/youtube] Fix error reporting of "Incomplete data" +* [extractor/youtube] Improve format sorting for IOS formats +* [extractor/youtube] Improve signature caching +* [extractor/instagram] Fix extraction by [bashonly](https://github.com/bashonly), [pritam20ps05](https://github.com/pritam20ps05) +* [extractor/rai] Minor fix by [nixxo](https://github.com/nixxo) +* [extractor/rtbf] Fix stream extractor by [elyse0](https://github.com/elyse0) +* [extractor/SovietsCloset] Fix extractor by [ChillingPepper](https://github.com/ChillingPepper) +* [extractor/zattoo] Fix Zattoo resellers by [goggle](https://github.com/goggle) + +### 2022.08.14 + +* Merge youtube-dl: Upto [commit/d231b56](https://github.com/ytdl-org/youtube-dl/commit/d231b56) +* [jsinterp] Handle **new youtube signature functions** +* [jsinterp] Truncate error messages +* [extractor] Fix format sorting of `channels` +* [ffmpeg] Disable avconv unless `--prefer-avconv` +* [ffmpeg] Smarter detection of ffprobe filename +* [embedthumbnail] Detect `libatomicparsley.so` +* [ThumbnailsConvertor] Fix conversion after `fixup_webp` +* [utils] Fix `get_compatible_ext` +* [build] Fix changelog +* [update] Set executable bit-mask by [pukkandan](https://github.com/pukkandan), [Lesmiscore](https://github.com/Lesmiscore) +* [devscripts] Fix import +* [docs] Consistent use of `e.g.` by [Lesmiscore](https://github.com/Lesmiscore) +* [cleanup] Misc fixes and cleanup +* [extractor/moview] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/parler] Add extractor by [palewire](https://github.com/palewire) +* [extractor/patreon] Ignore erroneous media attachments by [coletdjnz](https://github.com/coletdjnz) +* [extractor/truth] Add extractor by [palewire](https://github.com/palewire) +* [extractor/aenetworks] Add formats parameter by [jacobtruman](https://github.com/jacobtruman) +* [extractor/crunchyroll] Improve `_VALID_URL`s +* [extractor/doodstream] Add `wf` domain by [aldoridhoni](https://github.com/aldoridhoni) +* [extractor/facebook] Add reel support by [bashonly](https://github.com/bashonly) +* [extractor/MLB] New extractor by [ischmidt20](https://github.com/ischmidt20) +* [extractor/rai] Misc fixes by [nixxo](https://github.com/nixxo) +* [extractor/toggo] Improve `_VALID_URL` by [masta79](https://github.com/masta79) +* [extractor/tubitv] Extract additional formats by [shirt-dev](https://github.com/shirt-dev) +* [extractor/zattoo] Potential fix for resellers + + +### 2022.08.08 + +* **Remove Python 3.6 support** +* Determine merge container better by [pukkandan](https://github.com/pukkandan), [selfisekai](https://github.com/selfisekai) +* Framework for embed detection by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* Merge youtube-dl: Upto [commit/adb5294](https://github.com/ytdl-org/youtube-dl/commit/adb5294) +* `--compat-option no-live-chat` should disable danmaku +* Fix misleading DRM message +* Import ctypes only when necessary +* Minor bugfixes +* Reject entire playlists faster with `--match-filter` +* Remove filtered entries from `-J` +* Standardize retry mechanism +* Validate `--merge-output-format` +* [downloader] Add average speed to final progress line +* [extractor] Add field `audio_channels` +* [extractor] Support multiple archive ids for one video +* [ffmpeg] Set `ffmpeg_location` in a contextvar +* [FFmpegThumbnailsConvertor] Fix conversion from GIF +* [MetadataParser] Don't set `None` when the field didn't match +* [outtmpl] Smarter replacing of unsupported characters +* [outtmpl] Treat empty values as None in filenames +* [utils] sanitize_open: Allow any IO stream as stdout +* [build, devscripts] Add devscript to set a build variant +* [build] Improve build process by [shirt-dev](https://github.com/shirt-dev) +* [build] Update pyinstaller +* [devscripts] Create `utils` and refactor +* [docs] Clarify `best*` +* [docs] Fix bug report issue template +* [docs] Fix capitalization in references by [christoph-heinrich](https://github.com/christoph-heinrich) +* [cleanup, mhtml] Use imghdr +* [cleanup, utils] Consolidate known media extensions +* [cleanup] Misc fixes and cleanup +* [extractor/angel] Add extractor by [AxiosDeminence](https://github.com/AxiosDeminence) +* [extractor/dplay] Add MotorTrend extractor by [Sipherdrakon](https://github.com/Sipherdrakon) +* [extractor/harpodeon] Add extractor by [eren-kemer](https://github.com/eren-kemer) +* [extractor/holodex] Add extractor by [pukkandan](https://github.com/pukkandan), [sqrtNOT](https://github.com/sqrtNOT) +* [extractor/kompas] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/rai] Add raisudtirol extractor by [nixxo](https://github.com/nixxo) +* [extractor/tempo] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/youtube] **Fixes for third party client detection** by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Add `live_status=post_live` by [lazypete365](https://github.com/lazypete365) +* [extractor/youtube] Extract more format info +* [extractor/youtube] Parse translated subtitles only when requested +* [extractor/youtube, extractor/twitch] Allow waiting for channels to become live +* [extractor/youtube, webvtt] Extract auto-subs from livestream VODs by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) +* [extractor/AbemaTVTitle] Implement paging by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/archiveorg] Improve handling of formats by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/arte] Fix title extraction +* [extractor/arte] **Move to v2 API** by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) +* [extractor/bbc] Fix news articles by [ajj8](https://github.com/ajj8) +* [extractor/camtasia] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/cloudflarestream] Fix video_id padding by [haobinliang](https://github.com/haobinliang) +* [extractor/crunchyroll] Fix conversion of thumbnail from GIF +* [extractor/crunchyroll] Handle missing metadata correctly by [Burve](https://github.com/Burve), [pukkandan](https://github.com/pukkandan) +* [extractor/crunchyroll:beta] Extract timestamp and fix tests by [tejing1](https://github.com/tejing1) +* [extractor/crunchyroll:beta] Use streams API by [tejing1](https://github.com/tejing1) +* [extractor/doodstream] Support more domains by [Galiley](https://github.com/Galiley) +* [extractor/ESPN] Extract duration by [ischmidt20](https://github.com/ischmidt20) +* [extractor/FIFA] Change API endpoint by [Bricio](https://github.com/Bricio), [yashkc2025](https://github.com/yashkc2025) +* [extractor/globo:article] Remove false positives by [Bricio](https://github.com/Bricio) +* [extractor/Go] Extract timestamp by [ischmidt20](https://github.com/ischmidt20) +* [extractor/hidive] Fix cookie login when netrc is also given by [winterbird-code](https://github.com/winterbird-code) +* [extractor/html5] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/ina] Improve extractor by [elyse0](https://github.com/elyse0) +* [extractor/NaverNow] Change endpoint by [ping](https://github.com/ping) +* [extractor/ninegag] Extract uploader by [DjesonPV](https://github.com/DjesonPV) +* [extractor/NovaPlay] Fix extractor by [Bojidarist](https://github.com/Bojidarist) +* [extractor/orf:radio] Rewrite extractors +* [extractor/patreon] Fix and improve extractors by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/rai] Fix RaiNews extraction by [nixxo](https://github.com/nixxo) +* [extractor/redbee] Unify and update extractors by [elyse0](https://github.com/elyse0) +* [extractor/stripchat] Fix _VALID_URL by [freezboltz](https://github.com/freezboltz) +* [extractor/tubi] Exclude playlists from playlist entries by [sqrtNOT](https://github.com/sqrtNOT) +* [extractor/tviplayer] Improve `_VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/twitch] Extract chapters for single chapter VODs by [mpeter50](https://github.com/mpeter50) +* [extractor/vgtv] Support tv.vg.no by [sqrtNOT](https://github.com/sqrtNOT) +* [extractor/vidio] Support embed link by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vk] Fix extractor by [Mehavoid](https://github.com/Mehavoid) +* [extractor/WASDTV:record] Fix `_VALID_URL` +* [extractor/xfileshare] Add Referer by [Galiley](https://github.com/Galiley) +* [extractor/YahooJapanNews] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/yandexmusic] Extract higher quality format +* [extractor/zee5] Update Device ID by [m4tu4g](https://github.com/m4tu4g) + + +### 2022.07.18 + +* Allow users to specify encoding in each config files by [Lesmiscore](https://github.com/Lesmiscore) +* Discard infodict from memory if no longer needed +* Do not allow extractors to return `None` +* Do not load system certificates when `certifi` is used +* Fix rounding of integers in format table +* Improve chapter sanitization +* Skip some fixup if remux/recode is needed by [Lesmiscore](https://github.com/Lesmiscore) +* Support `--no-progress` for `--wait-for-video` +* Fix bug in [612f2be](https://github.com/yt-dlp/yt-dlp/commit/612f2be5d3924540158dfbe5f25d841f04cff8c6) +* [outtmpl] Add alternate form `h` for HTML escaping +* [aes] Add multiple padding modes in CBC by [elyse0](https://github.com/elyse0) +* [extractor/common] Passthrough `errnote=False` to parsers +* [extractor/generic] Remove HEAD request +* [http] Ensure the file handle is always closed +* [ModifyChapters] Modify duration in infodict +* [options] Fix aliases to `--config-location` +* [utils] Fix `get_domain` +* [build] Consistent order for lazy extractors by [lamby](https://github.com/lamby) +* [build] Fix architecture suffix of executables by [odo2063](https://github.com/odo2063) +* [build] Improve `setup.py` +* [update] Do not check `_update_spec` when up to date +* [update] Prepare to remove Python 3.6 support +* [compat] Let PyInstaller detect _legacy module +* [devscripts/update-formulae] Do not change dependency section +* [test] Split download tests so they can be more easily run in CI +* [docs] Improve docstring of `download_ranges` by [FirefoxMetzger](https://github.com/FirefoxMetzger) +* [docs] Improve issue templates +* [build] Fix bug in [6d916fe](https://github.com/yt-dlp/yt-dlp/commit/6d916fe709a38e8c4c69b73843acf170b5165931) +* [cleanup, utils] Refactor parse_codecs +* [cleanup] Misc fixes and cleanup +* [extractor/acfun] Add extractors by [lockmatrix](https://github.com/lockmatrix) +* [extractor/Audiodraft] Add extractors by [Ashish0804](https://github.com/Ashish0804), [fstirlitz](https://github.com/fstirlitz) +* [extractor/cellebrite] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/detik] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/hytale] Add extractor by [llamasblade](https://github.com/llamasblade), [pukkandan](https://github.com/pukkandan) +* [extractor/liputan6] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/mocha] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/rtl.lu] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/rtvsl] Add extractor by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan) +* [extractor/StarTrek] Add extractor by [scy](https://github.com/scy) +* [extractor/syvdk] Add extractor by [misaelaguayo](https://github.com/misaelaguayo) +* [extractor/theholetv] Add extractor by [dosy4ev](https://github.com/dosy4ev) +* [extractor/TubeTuGraz] Add extractor by [Ferdi265](https://github.com/Ferdi265), [pukkandan](https://github.com/pukkandan) +* [extractor/tviplayer] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/wetv] Add extractors by [elyse0](https://github.com/elyse0) +* [extractor/wikimedia] Add extractor by [EhtishamSabir](https://github.com/EhtishamSabir), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube] Fix duration check for post-live manifestless mode +* [extractor/youtube] More metadata for storyboards by [ftk](https://github.com/ftk) +* [extractor/bigo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/BiliIntl] Fix subtitle extraction by [MinePlayersPE](https://github.com/MinePlayersPE) +* [extractor/crunchyroll] Improve `_VALID_URL` +* [extractor/fifa] Fix extractor by [ischmidt20](https://github.com/ischmidt20) +* [extractor/instagram] Fix post/story extractors by [pritam20ps05](https://github.com/pritam20ps05), [pukkandan](https://github.com/pukkandan) +* [extractor/iq] Set language correctly for Korean subtitles +* [extractor/MangoTV] Fix subtitle languages +* [extractor/Netverse] Improve playlist extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/philharmoniedeparis] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT) +* [extractor/Trovo] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [extractor/twitch] Support storyboards for VODs by [ftk](https://github.com/ftk) +* [extractor/WatchESPN] Improve `_VALID_URL` by [IONECarter](https://github.com/IONECarter), [dirkf](https://github.com/dirkf) +* [extractor/WSJArticle] Fix video id extraction by [sqrtNOT](https://github.com/sqrtNOT) +* [extractor/Ximalaya] Fix extractors by [lockmatrix](https://github.com/lockmatrix) +* [cleanup, extractor/youtube] Fix tests by [sheerluck](https://github.com/sheerluck) + + +### 2022.06.29 + +* Fix `--downloader native` +* Fix `section_end` of clips +* Fix playlist error handling +* Sanitize `chapters` +* [extractor] Fix `_create_request` when headers is None +* [extractor] Fix empty `BaseURL` in MPD +* [ffmpeg] Write full output to debug on error +* [hls] Warn user when trying to download live HLS +* [options] Fix `parse_known_args` for `--` +* [utils] Fix inconsistent default handling between HTTP and HTTPS requests by [coletdjnz](https://github.com/coletdjnz) +* [build] Draft release until complete +* [build] Fix release tag commit +* [build] Standalone x64 builds for MacOS 10.9 by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +* [update] Ability to set a maximum version for specific variants +* [compat] Fix `compat.WINDOWS_VT_MODE` +* [compat] Remove deprecated functions from core code +* [compat] Remove more functions +* [cleanup, extractor] Reduce direct use of `_downloader` +* [cleanup] Consistent style for file heads +* [cleanup] Fix some typos by [crazymoose77756](https://github.com/crazymoose77756) +* [cleanup] Misc fixes and cleanup +* [extractor/Scrolller] Add extractor by [LunarFang416](https://github.com/LunarFang416) +* [extractor/ViMP] Add playlist extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) +* [extractor/fuyin] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/livestreamfails] Add extractor by [nomevi](https://github.com/nomevi) +* [extractor/premiershiprugby] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/steam] Add broadcast extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/youtube] Mark videos as fully watched by [Brett824](https://github.com/Brett824) +* [extractor/CWTV] Extract thumbnail by [ischmidt20](https://github.com/ischmidt20) +* [extractor/ViMP] Add thumbnail and support more sites by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) +* [extractor/dropout] Support cookies and login only as needed by [pingiun](https://github.com/pingiun), [pukkandan](https://github.com/pukkandan) +* [extractor/ertflix] Improve `_VALID_URL` +* [extractor/lbry] Use HEAD request for redirect URL by [flashdagger](https://github.com/flashdagger) +* [extractor/mediaset] Improve `_VALID_URL` +* [extractor/npr] Implement [e50c350](https://github.com/yt-dlp/yt-dlp/commit/e50c3500b43d80e4492569c4b4523c4379c6fbb2) differently +* [extractor/tennistv] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [zenerdi0de](https://github.com/zenerdi0de) + +### 2022.06.22.1 + +* [build] Fix updating homebrew formula + +### 2022.06.22 + +* [**Deprecate support for Python 3.6**](https://github.com/yt-dlp/yt-dlp/issues/3764#issuecomment-1154051119) +* **Add option `--download-sections` to download video partially** + * Chapter regex and time ranges are accepted, e.g. `--download-sections *1:10-2:20` +* Add option `--alias` +* Add option `--lazy-playlist` to process entries as they are received +* Add option `--retry-sleep` +* Add slicing notation to `--playlist-items` + * Adds support for negative indices and step + * Add `-I` as alias for `--playlist-index` + * Makes `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse` redundant +* `--config-location -` to provide options interactively +* [build] Add Linux standalone builds +* [update] Self-restart after update +* Merge youtube-dl: Upto [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a9) +* Add `--no-update` +* Allow extractors to specify section_start/end for clips +* Do not print progress to `stderr` with `-q` +* Ensure pre-processor errors do not block video download +* Fix `--simulate --max-downloads` +* Improve error handling of bad config files +* Return an error code if update fails +* Fix bug in [3a408f9](https://github.com/yt-dlp/yt-dlp/commit/3a408f9d199127ca2626359e21a866a09ab236b3) +* [ExtractAudio] Allow conditional conversion +* [ModifyChapters] Fix repeated removal of small segments +* [ThumbnailsConvertor] Allow conditional conversion +* [cookies] Detect profiles for cygwin/BSD by [moench-tegeder](https://github.com/moench-tegeder) +* [dash] Show fragment count with `--live-from-start` by [flashdagger](https://github.com/flashdagger) +* [extractor] Add `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor] Add `default` parameter to `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor] Add dev option `--load-pages` +* [extractor] Handle `json_ld` with multiple `@type`s +* [extractor] Import `_ALL_CLASSES` lazily +* [extractor] Recognize `src` attribute from HTML5 media elements by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/generic] Revert e6ae51c123897927eb3c9899923d8ffd31c7f85d +* [f4m] Bugfix +* [ffmpeg] Check version lazily +* [jsinterp] Some optimizations and refactoring by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [utils] Improve performance using `functools.cache` +* [utils] Send HTTP/1.1 ALPN extension by [coletdjnz](https://github.com/coletdjnz) +* [utils] `ExtractorError`: Fix `exc_info` +* [utils] `ISO3166Utils`: Add `EU` and `AP` +* [utils] `Popen`: Refactor to use contextmanager +* [utils] `locked_file`: Fix for PyPy on Windows +* [update] Expose more functionality to API +* [update] Use `.git` folder to distinguish `source`/`unknown` +* [compat] Add `functools.cached_property` +* [test] Fix `FakeYDL` signatures by [coletdjnz](https://github.com/coletdjnz) +* [docs] Improvements +* [cleanup, ExtractAudio] Refactor +* [cleanup, downloader] Refactor `report_progress` +* [cleanup, extractor] Refactor `_download_...` methods +* [cleanup, extractor] Rename `extractors.py` to `_extractors.py` +* [cleanup, utils] Don't use kwargs for `format_field` +* [cleanup, build] Refactor +* [cleanup, docs] Re-indent "Usage and Options" section +* [cleanup] Deprecate `YoutubeDL.parse_outtmpl` +* [cleanup] Misc fixes and cleanup by [Lesmiscore](https://github.com/Lesmiscore), [MrRawes](https://github.com/MrRawes), [christoph-heinrich](https://github.com/christoph-heinrich), [flashdagger](https://github.com/flashdagger), [gamer191](https://github.com/gamer191), [kwconder](https://github.com/kwconder), [pukkandan](https://github.com/pukkandan) +* [extractor/DailyWire] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) +* [extractor/fourzerostudio] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/GoogleDrive] Add folder extractor by [evansp](https://github.com/evansp), [pukkandan](https://github.com/pukkandan) +* [extractor/MirrorCoUK] Add extractor by [LunarFang416](https://github.com/LunarFang416), [pukkandan](https://github.com/pukkandan) +* [extractor/atscaleconfevent] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [extractor/freetv] Add extractor by [elyse0](https://github.com/elyse0) +* [extractor/ixigua] Add Extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/kicker.de] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/netverse] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) +* [extractor/playsuisse] Add extractor by [pukkandan](https://github.com/pukkandan), [sbor23](https://github.com/sbor23) +* [extractor/substack] Add extractor by [elyse0](https://github.com/elyse0) +* [extractor/youtube] **Support downloading clips** +* [extractor/youtube] Add `innertube_host` and `innertube_key` extractor args by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Add warning for PostLiveDvr +* [extractor/youtube] Bring back `_extract_chapters_from_description` +* [extractor/youtube] Extract `comment_count` from webpage +* [extractor/youtube] Fix `:ytnotifications` extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Fix initial player response extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube] Fix live chat for videos with content warning by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Make signature extraction non-fatal +* [extractor/youtube:tab] Detect `videoRenderer` in `_post_thread_continuation_entries` +* [extractor/BiliIntl] Fix metadata extraction +* [extractor/BiliIntl] Fix subtitle extraction by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/FranceCulture] Fix extractor by [aurelg](https://github.com/aurelg), [pukkandan](https://github.com/pukkandan) +* [extractor/PokemonSoundLibrary] Remove extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/StreamCZ] Fix extractor by [adamanldo](https://github.com/adamanldo), [dirkf](https://github.com/dirkf) +* [extractor/WatchESPN] Support free videos and BAM_DTC by [ischmidt20](https://github.com/ischmidt20) +* [extractor/animelab] Remove extractor by [gamer191](https://github.com/gamer191) +* [extractor/bloomberg] Change playback endpoint by [m4tu4g](https://github.com/m4tu4g) +* [extractor/ccc] Extract view_count by [vkorablin](https://github.com/vkorablin) +* [extractor/crunchyroll:beta] Fix extractor after API change by [Burve](https://github.com/Burve), [tejing1](https://github.com/tejing1) +* [extractor/curiositystream] Get `auth_token` from cookie by [mnn](https://github.com/mnn) +* [extractor/digitalconcerthall] Fix extractor by [ZhymabekRoman](https://github.com/ZhymabekRoman) +* [extractor/dropbox] Extract the correct `mountComponent` +* [extractor/dropout] Login is not mandatory +* [extractor/duboku] Fix for hostname change by [mozbugbox](https://github.com/mozbugbox) +* [extractor/espn] Add `WatchESPN` extractor by [ischmidt20](https://github.com/ischmidt20), [pukkandan](https://github.com/pukkandan) +* [extractor/expressen] Fix extractor by [aejdl](https://github.com/aejdl) +* [extractor/foxnews] Update embed extraction by [elyse0](https://github.com/elyse0) +* [extractor/ina] Fix extractor by [elyse0](https://github.com/elyse0) +* [extractor/iwara:user] Make paging better by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/jwplatform] Look for `data-video-jw-id` +* [extractor/lbry] Update livestream API by [flashdagger](https://github.com/flashdagger) +* [extractor/mediaset] Improve `_VALID_URL` +* [extractor/naver] Add `navernow` extractor by [ping](https://github.com/ping) +* [extractor/niconico:series] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT) +* [extractor/npr] Use stream url from json-ld by [r5d](https://github.com/r5d) +* [extractor/pornhub] Extract `uploader_id` field by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/radiofrance] Add more radios by [bubbleguuum](https://github.com/bubbleguuum) +* [extractor/rumble] Detect JS embed +* [extractor/rumble] Extract subtitles by [fstirlitz](https://github.com/fstirlitz) +* [extractor/southpark] Add `southpark.lat` extractor by [darkxex](https://github.com/darkxex) +* [extractor/spotify:show] Fix extractor +* [extractor/tiktok] Detect embeds +* [extractor/tiktok] Extract `SIGI_STATE` by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan), [sulyi](https://github.com/sulyi) +* [extractor/tver] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/vevo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/yahoo:gyao] Fix extractor +* [extractor/zattoo] Fix live streams by [miseran](https://github.com/miseran) +* [extractor/zdf] Improve format sorting by [elyse0](https://github.com/elyse0) + + +### 2022.05.18 + +* Add support for SSL client certificate authentication by [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf) + * Adds `--client-certificate`, `--client-certificate-key`, `--client-certificate-password` +* Add `--match-filter -` to interactively ask for each video +* `--max-downloads` should obey `--break-per-input` +* Allow use of weaker ciphers with `--legacy-server-connect` +* Don't imply `-s` for later stages of `-O` +* Fix `--date today` +* Fix `--skip-unavailable-fragments` +* Fix color in `-q -F` +* Fix redirect HTTP method handling by [coletdjnz](https://github.com/coletdjnz) +* Improve `--clean-infojson` +* Remove warning for videos with an empty title +* Run `FFmpegFixupM3u8PP` for live-streams if needed +* Show name of downloader in verbose log +* [cookies] Allow `cookiefile` to be a text stream +* [cookies] Report progress when importing cookies +* [downloader/ffmpeg] Specify headers for each URL by [elyse0](https://github.com/elyse0) +* [fragment] Do not change chunk-size when `--test` +* [fragment] Make single thread download work for `--live-from-start` by [Lesmiscore](https://github.com/Lesmiscore) +* [hls] Fix `byte_range` for `EXT-X-MAP` fragment by [fstirlitz](https://github.com/fstirlitz) +* [http] Fix retrying on read timeout by [coletdjnz](https://github.com/coletdjnz) +* [ffmpeg] Fix features detection +* [EmbedSubtitle] Enable for more video extensions +* [EmbedThumbnail] Disable thumbnail conversion for mkv by [evansp](https://github.com/evansp) +* [EmbedThumbnail] Do not obey `-k` +* [EmbedThumbnail] Do not remove id3v1 tags +* [FFmpegMetadata] Remove `\0` from metadata +* [FFmpegMetadata] Remove filename from attached info-json +* [FixupM3u8] Obey `--hls-prefer-mpegts` +* [Sponsorblock] Don't crash when duration is unknown +* [XAttrMetadata] Refactor and document dependencies +* [extractor] Document netrc machines +* [extractor] Update `manifest_url`s after redirect by [elyse0](https://github.com/elyse0) +* [extractor] Update dash `manifest_url` after redirects by [elyse0](https://github.com/elyse0) +* [extractor] Use `classmethod`/`property` where possible +* [generic] Refactor `_extract_rss` +* [utils] `is_html`: Handle double BOM +* [utils] `locked_file`: Ignore illegal seek on `truncate` by [jakeogh](https://github.com/jakeogh) +* [utils] `sanitize_path`: Fix when path is empty string +* [utils] `write_string`: Workaround newline issue in `conhost` +* [utils] `certifi`: Make sure the pem file exists +* [utils] Fix `WebSocketsWrapper` +* [utils] `locked_file`: Do not give executable bits for newly created files by [Lesmiscore](https://github.com/Lesmiscore) +* [utils] `YoutubeDLCookieJar`: Detect and reject JSON file by [Lesmiscore](https://github.com/Lesmiscore) +* [test] Convert warnings into errors and fix some existing warnings by [fstirlitz](https://github.com/fstirlitz) +* [dependencies] Create module with all dependency imports +* [compat] Split into sub-modules by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) +* [compat] Implement `compat.imghdr` +* [build] Add `make uninstall` by [MrRawes](https://github.com/MrRawes) +* [build] Avoid use of `install -D` +* [build] Fix `Makefile` by [putnam](https://github.com/putnam) +* [build] Fix `--onedir` on macOS +* [build] Add more test-runners +* [cleanup] Deprecate some compat vars by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) +* [cleanup] Remove unused code paths, extractors, scripts and tests by [fstirlitz](https://github.com/fstirlitz) +* [cleanup] Upgrade syntax (`pyupgrade`) and sort imports (`isort`) +* [cleanup, docs, build] Misc fixes +* [BilibiliLive] Add extractor by [HE7086](https://github.com/HE7086), [pukkandan](https://github.com/pukkandan) +* [Fifa] Add Extractor by [Bricio](https://github.com/Bricio) +* [goodgame] Add extractor by [nevack](https://github.com/nevack) +* [gronkh] Add playlist extractors by [hatienl0i261299](https://github.com/hatienl0i261299) +* [icareus] Add extractor by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan) +* [iwara] Add playlist extractors by [i6t](https://github.com/i6t) +* [Likee] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [masters] Add extractor by [m4tu4g](https://github.com/m4tu4g) +* [nebula] Add support for subscriptions by [hheimbuerger](https://github.com/hheimbuerger) +* [Podchaser] Add extractors by [connercsbn](https://github.com/connercsbn) +* [rokfin:search] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) +* [youtube] Add `:ytnotifications` extractor by [krichbanana](https://github.com/krichbanana) +* [youtube] Add YoutubeStoriesIE (`ytstories:<channel UCID>`) by [coletdjnz](https://github.com/coletdjnz) +* [ZingMp3] Add chart and user extractors by [hatienl0i261299](https://github.com/hatienl0i261299) +* [adn] Update AES key by [elyse0](https://github.com/elyse0) +* [adobepass] Allow cookies for authenticating MSO +* [bandcamp] Exclude merch links by [Yipten](https://github.com/Yipten) +* [chingari] Fix archiving and tests +* [DRTV] Improve `_VALID_URL` by [vertan](https://github.com/vertan) +* [facebook] Improve thumbnail extraction by [Wikidepia](https://github.com/Wikidepia) +* [fc2] Stop heatbeating once FFmpeg finishes by [Lesmiscore](https://github.com/Lesmiscore) +* [Gofile] Fix extraction and support password-protected links by [mehq](https://github.com/mehq) +* [hotstar, cleanup] Refactor extractors +* [InfoQ] Don't fail on missing audio format by [evansp](https://github.com/evansp) +* [Jamendo] Extract more metadata by [evansp](https://github.com/evansp) +* [kaltura] Update API calls by [flashdagger](https://github.com/flashdagger) +* [KhanAcademy] Fix extractor by [rand-net](https://github.com/rand-net) +* [LCI] Fix extractor by [MarwenDallel](https://github.com/MarwenDallel) +* [lrt] Support livestreams by [GiedriusS](https://github.com/GiedriusS) +* [niconico] Set `expected_protocol` to a public field +* [Niconico] Support 2FA by [ekangmonyet](https://github.com/ekangmonyet) +* [Olympics] Fix format extension +* [openrec:movie] Enable fallback for /movie/ URLs +* [PearVideo] Add fallback for formats by [hatienl0i261299](https://github.com/hatienl0i261299) +* [radiko] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [rai] Add `release_year` +* [reddit] Prevent infinite loop +* [rokfin] Implement login by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) +* [ruutu] Support hs.fi embeds by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan) +* [spotify] Detect iframe embeds by [fstirlitz](https://github.com/fstirlitz) +* [telegram] Fix metadata extraction +* [tmz, cleanup] Update tests by [diegorodriguezv](https://github.com/diegorodriguezv) +* [toggo] Fix `_VALID_URL` by [ca-za](https://github.com/ca-za) +* [trovo] Update to new API by [nyuszika7h](https://github.com/nyuszika7h) +* [TVer] Improve extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [twitcasting] Pass headers for each formats by [Lesmiscore](https://github.com/Lesmiscore) +* [VideocampusSachsen] Improve extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) +* [vimeo] Fix extractors +* [wat] Fix extraction of multi-language videos and subtitles by [elyse0](https://github.com/elyse0) +* [wistia] Fix `_VALID_URL` by [dirkf](https://github.com/dirkf) +* [youtube, cleanup] Minor refactoring by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [youtube] Added piped instance urls by [JordanWeatherby](https://github.com/JordanWeatherby) +* [youtube] Deprioritize auto-generated thumbnails +* [youtube] Deprioritize format 22 (often damaged) +* [youtube] Fix episode metadata extraction +* [zee5] Fix extractor by [Ashish0804](https://github.com/Ashish0804) +* [zingmp3, cleanup] Refactor extractors + + +### 2022.04.08 + +* Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz) +* Treat multiple `--match-filters` as OR +* File locking improvements: + * Do not lock downloading file on Windows + * Do not prevent download if locking is unsupported + * Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan) + * Fix non-blocking non-exclusive lock +* De-prioritize automatic-subtitles when no `--sub-lang` is given +* Exit after `--dump-user-agent` +* Fallback to video-only format when selecting by extension +* Fix `--abort-on-error` for subtitles +* Fix `--no-overwrite` for playlist infojson +* Fix `--print` with `--ignore-no-formats` when url is `None` by [flashdagger](https://github.com/flashdagger) +* Fix `--sleep-interval` +* Fix `--throttled-rate` +* Fix `autonumber` +* Fix case of `http_headers` +* Fix filepath sanitization in `--print-to-file` +* Handle float in `--wait-for-video` +* Ignore `mhtml` formats from `-f mergeall` +* Ignore format-specific fields in initial pass of `--match-filter` +* Protect stdout from unexpected progress and console-title +* Remove `Accept-Encoding` header from `std_headers` by [coletdjnz](https://github.com/coletdjnz) +* Remove incorrect warning for `--dateafter` +* Show warning when all media formats have DRM +* [downloader] Fix invocation of `HttpieFD` +* [http] Fix #3215 +* [http] Reject broken range before request by [Lesmiscore](https://github.com/Lesmiscore), [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan) +* [fragment] Read downloaded fragments only when needed by [Lesmiscore](https://github.com/Lesmiscore) +* [http] Retry on more errors by [coletdjnz](https://github.com/coletdjnz) +* [mhtml] Fix fragments with absolute urls by [coletdjnz](https://github.com/coletdjnz) +* [extractor] Add `_perform_login` function +* [extractor] Allow control characters inside json +* [extractor] Support merging subtitles with data by [coletdjnz](https://github.com/coletdjnz) +* [generic] Extract subtitles from video.js by [Lesmiscore](https://github.com/Lesmiscore) +* [ffmpeg] Cache version data +* [FFmpegConcat] Ensure final directory exists +* [FfmpegMetadata] Write id3v1 tags +* [FFmpegVideoConvertor] Add more formats to `--remux-video` +* [FFmpegVideoConvertor] Ensure all streams are copied +* [MetadataParser] Validate outtmpl early +* [outtmpl] Fix replacement/default when used with alternate +* [outtmpl] Limit changes during sanitization +* [phantomjs] Fix bug +* [test] Add `test_locked_file` +* [utils] `format_decimal_suffix`: Fix for very large numbers by [s0u1h](https://github.com/s0u1h) +* [utils] `traverse_obj`: Allow filtering by value +* [utils] Add `filter_dict`, `get_first`, `try_call` +* [utils] ExtractorError: Fix for older Python versions +* [utils] WebSocketsWrapper: Allow omitting `__enter__` invocation by [Lesmiscore](https://github.com/Lesmiscore) +* [docs] Add an `.editorconfig` file by [fstirlitz](https://github.com/fstirlitz) +* [docs] Clarify the exact `BSD` license of dependencies by [MrRawes](https://github.com/MrRawes) +* [docs] Minor improvements by [pukkandan](https://github.com/pukkandan), [cffswb](https://github.com/cffswb), [danielyli](https://github.com/danielyli) +* [docs] Remove readthedocs +* [build] Add `requirements.txt` to pip distributions +* [cleanup, postprocessor] Create `_download_json` +* [cleanup, vimeo] Fix tests +* [cleanup] Misc fixes and minor cleanup +* [cleanup] Use `_html_extract_title` +* [AfreecaTV] Add `AfreecaTVUserIE` by [hatienl0i261299](https://github.com/hatienl0i261299) +* [arte] Add `format_note` to m3u8 formats +* [azmedien] Add TVO Online to supported hosts by [1-Byte](https://github.com/1-Byte) +* [BanBye] Add extractor by [mehq](https://github.com/mehq) +* [bilibili] Fix extraction of title with quotes by [dzek69](https://github.com/dzek69) +* [Craftsy] Add extractor by [Bricio](https://github.com/Bricio) +* [Cybrary] Add extractor by [aaearon](https://github.com/aaearon) +* [Huya] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [ITProTV] Add extractor by [aaearon](https://github.com/aaearon) +* [Jable] Add extractors by [mehq](https://github.com/mehq) +* [LastFM] Add extractors by [mehq](https://github.com/mehq) +* [Moviepilot] Add extractor by [panatexxa](https://github.com/panatexxa) +* [panopto] Add extractors by [coletdjnz](https://github.com/coletdjnz), [kmark](https://github.com/kmark) +* [PokemonSoundLibrary] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [WasdTV] Add extractor by [un-def](https://github.com/un-def), [hatienl0i261299](https://github.com/hatienl0i261299) +* [adobepass] Fix Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) +* [afreecatv] Match new vod url by [wlritchi](https://github.com/wlritchi) +* [AZMedien] Support `tv.telezueri.ch` by [goggle](https://github.com/goggle) +* [BiliIntl] Support user-generated videos by [wlritchi](https://github.com/wlritchi) +* [BRMediathek] Fix VALID_URL +* [crunchyroll:playlist] Implement beta API by [tejing1](https://github.com/tejing1) +* [crunchyroll] Fix inheritance +* [daftsex] Fix extractor by [Soebb](https://github.com/Soebb) +* [dailymotion] Support `geo.dailymotion.com` by [hatienl0i261299](https://github.com/hatienl0i261299) +* [ellentube] Extract subtitles from manifest +* [elonet] Rewrite extractor by [Fam0r](https://github.com/Fam0r), [pukkandan](https://github.com/pukkandan) +* [fptplay] Fix metadata extraction by [hatienl0i261299](https://github.com/hatienl0i261299) +* [FranceCulture] Support playlists by [bohwaz](https://github.com/bohwaz) +* [go, viu] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz) +* [Imdb] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [MangoTV] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [Nebula] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0 +* [niconico] Fix extraction of thumbnails and uploader (#3266) +* [niconico] Rewrite NiconicoIE by [Lesmiscore](https://github.com/Lesmiscore) +* [nitter] Minor fixes and update instance list by [foghawk](https://github.com/foghawk) +* [NRK] Extract timestamp by [hatienl0i261299](https://github.com/hatienl0i261299) +* [openrec] Download archived livestreams by [Lesmiscore](https://github.com/Lesmiscore) +* [openrec] Refactor extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [panopto] Improve subtitle extraction and support slides by [coletdjnz](https://github.com/coletdjnz) +* [ParamountPlus, CBS] Change VALID_URL by [Sipherdrakon](https://github.com/Sipherdrakon) +* [ParamountPlusSeries] Support multiple pages by [dodrian](https://github.com/dodrian) +* [Piapro] Extract description with break lines by [Lesmiscore](https://github.com/Lesmiscore) +* [rai] Fix extraction of http formas by [nixxo](https://github.com/nixxo) +* [rumble] unescape title +* [RUTV] Fix format sorting by [Lesmiscore](https://github.com/Lesmiscore) +* [ruutu] Detect embeds by [tpikonen](https://github.com/tpikonen) +* [tenplay] Improve extractor by [aarubui](https://github.com/aarubui) +* [TikTok] Fix URLs with user id by [hatienl0i261299](https://github.com/hatienl0i261299) +* [TikTokVM] Fix redirect to user URL +* [TVer] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [TVer] Support landing page by [vvto33](https://github.com/vvto33) +* [twitcasting] Don't return multi_video for archive with single hls manifest by [Lesmiscore](https://github.com/Lesmiscore) +* [veo] Fix `_VALID_URL` +* [Veo] Fix extractor by [i6t](https://github.com/i6t) +* [viki] Don't attempt to modify URLs with signature by [nyuszika7h](https://github.com/nyuszika7h) +* [viu] Fix bypass for preview by [zackmark29](https://github.com/zackmark29) +* [viu] Fixed extractor by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan) +* [web.archive:youtube] Make CDX API requests non-fatal by [coletdjnz](https://github.com/coletdjnz) +* [wget] Fix proxy by [kikuyan](https://github.com/kikuyan), [coletdjnz](https://github.com/coletdjnz) +* [xnxx] Add `xnxx3.com` by [rozari0](https://github.com/rozari0) +* [youtube] **Add new age-gate bypass** by [zerodytrash](https://github.com/zerodytrash), [pukkandan](https://github.com/pukkandan) +* [youtube] Add extractor-arg to skip auto-translated subs +* [youtube] Avoid false positives when detecting damaged formats +* [youtube] Detect DRM better by [shirt](https://github.com/shirt-dev) +* [youtube] Fix auto-translated automatic captions +* [youtube] Fix pagination of `membership` tab +* [youtube] Fix uploader for collaborative playlists by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Improve video upload date handling by [coletdjnz](https://github.com/coletdjnz) +* [youtube:api] Prefer minified JSON response by [coletdjnz](https://github.com/coletdjnz) +* [youtube:search] Support hashtag entries by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Fix duration extraction for shorts by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Minor improvements +* [youtube:tab] Return shorts url if video is a short by [coletdjnz](https://github.com/coletdjnz) +* [Zattoo] Fix extractors by [goggle](https://github.com/goggle) +* [Zingmp3] Fix signature by [hatienl0i261299](https://github.com/hatienl0i261299) + + +### 2022.03.08.1 + +* [cleanup] Refactor `__init__.py` +* [build] Fix bug + +### 2022.03.08 + +* Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR) +* Add regex operator and quoting to format filters by [lukasfink1](https://github.com/lukasfink1) +* Add brotli content-encoding support by [coletdjnz](https://github.com/coletdjnz) +* Add pre-processor stage `after_filter` +* Better error message when no `--live-from-start` format +* Create necessary directories for `--print-to-file` +* Fill more fields for playlists by [Lesmiscore](https://github.com/Lesmiscore) +* Fix `-all` for `--sub-langs` +* Fix doubling of `video_id` in `ExtractorError` +* Fix for when stdout/stderr encoding is `None` +* Handle negative duration from extractor +* Implement `--add-header` without modifying `std_headers` +* Obey `--abort-on-error` for "ffmpeg not installed" +* Set `webpage_url_...` from `webpage_url` and not input URL +* Tolerate failure to `--write-link` due to unknown URL +* [aria2c] Add `--http-accept-gzip=true` +* [build] Update pyinstaller to 4.10 by [shirt](https://github.com/shirt-dev) +* [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley) +* [devscripts] Improve `prepare_manpage` +* [downloader] Do not use aria2c for non-native `m3u8` +* [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb) +* [extractor] Allow `http_headers` to be specified for `thumbnails` +* [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz) +* [extractor] Fix for manifests without period duration by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz) +* [FFmpegConcat] Abort on `--simulate` +* [FormatSort] Consider `acodec`=`ogg` as `vorbis` +* [fragment] Fix bugs around resuming with Range by [Lesmiscore](https://github.com/Lesmiscore) +* [fragment] Improve `--live-from-start` for YouTube livestreams by [Lesmiscore](https://github.com/Lesmiscore) +* [generic] Pass referer to extracted formats +* [generic] Set rss `guid` as video id by [Bricio](https://github.com/Bricio) +* [options] Better ambiguous option resolution +* [options] Rename `--clean-infojson` to `--clean-info-json` +* [SponsorBlock] Fixes for highlight and "full video labels" by [nihil-admirari](https://github.com/nihil-admirari) +* [Sponsorblock] minor fixes by [nihil-admirari](https://github.com/nihil-admirari) +* [utils] Better traceback for `ExtractorError` +* [utils] Fix file locking for AOSP by [jakeogh](https://github.com/jakeogh) +* [utils] Improve file locking +* [utils] OnDemandPagedList: Do not download pages after error +* [utils] render_table: Fix character calculation for removing extra gap by [Lesmiscore](https://github.com/Lesmiscore) +* [utils] Use `locked_file` for `sanitize_open` by [jakeogh](https://github.com/jakeogh) +* [utils] Validate `DateRange` input +* [utils] WebSockets wrapper for non-async functions by [Lesmiscore](https://github.com/Lesmiscore) +* [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos +* [cleanup] Remove extractors for some dead websites by [marieell](https://github.com/marieell) +* [cleanup, docs] Misc cleanup +* [AbemaTV] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [adobepass] Add Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) +* [ant1newsgr] Add extractor by [zmousm](https://github.com/zmousm) +* [bigo] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [Caltrans] Add extractor by [Bricio](https://github.com/Bricio) +* [daystar] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [fc2:live] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [fptplay] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [murrtube] Add extractor by [cyberfox1691](https://github.com/cyberfox1691) +* [nfb] Add extractor by [ofkz](https://github.com/ofkz) +* [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore) +* [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch) +* [piapro] Add extractor by [pycabbage](https://github.com/pycabbage), [Lesmiscore](https://github.com/Lesmiscore) +* [rokfin] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) +* [rokfin] Add stack and channel extractors by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) +* [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow) +* [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) +* [xinpianchang] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [abc] Support 1080p by [Ronnnny](https://github.com/Ronnnny) +* [afreecatv] Support password-protected livestreams by [wlritchi](https://github.com/wlritchi) +* [ard] Fix valid URL +* [ATVAt] Detect geo-restriction by [marieell](https://github.com/marieell) +* [bandcamp] Detect acodec +* [bandcamp] Fix user URLs by [lyz-code](https://github.com/lyz-code) +* [bbc] Fix extraction of news articles by [ajj8](https://github.com/ajj8) +* [beeg] Fix extractor by [Bricio](https://github.com/Bricio) +* [bigo] Fix extractor to not to use `form_params` +* [Bilibili] Pass referer for all formats by [blackgear](https://github.com/blackgear) +* [Biqle] Fix extractor by [Bricio](https://github.com/Bricio) +* [ccma] Fix timestamp parsing by [nyuszika7h](https://github.com/nyuszika7h) +* [crunchyroll] Better error reporting on login failure by [tejing1](https://github.com/tejing1) +* [cspan] Support of C-Span congress videos by [Grabien](https://github.com/Grabien) +* [dropbox] fix regex by [zenerdi0de](https://github.com/zenerdi0de) +* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [fujitv] Extract resolution for free sources by [YuenSzeHong](https://github.com/YuenSzeHong) +* [Gettr] Add `GettrStreamingIE` by [i6t](https://github.com/i6t) +* [Gettr] Fix formats order by [i6t](https://github.com/i6t) +* [Gettr] Improve extractor by [i6t](https://github.com/i6t) +* [globo] Expand valid URL by [Bricio](https://github.com/Bricio) +* [lbry] Fix `--ignore-no-formats-error` +* [manyvids] Extract `uploader` by [regarten](https://github.com/regarten) +* [mildom] Fix linter +* [mildom] Rework extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [mirrativ] Cleanup extractor code by [Lesmiscore](https://github.com/Lesmiscore) +* [nhk] Add support for NHK for School by [Lesmiscore](https://github.com/Lesmiscore) +* [niconico:tag] Add support for searching tags +* [nrk] Add fallback API +* [peekvids] Use JSON-LD by [schn0sch](https://github.com/schn0sch) +* [peertube] Add media.fsfe.org by [mxmehl](https://github.com/mxmehl) +* [rtvs] Fix extractor by [Bricio](https://github.com/Bricio) +* [spiegel] Fix `_VALID_URL` +* [ThumbnailsConvertor] Support `webp` +* [tiktok] Fix `vm.tiktok`/`vt.tiktok` URLs +* [tubitv] Fix/improve TV series extraction by [bbepis](https://github.com/bbepis) +* [tumblr] Fix extractor by [foghawk](https://github.com/foghawk) +* [twitcasting] Add fallback for finding running live by [Lesmiscore](https://github.com/Lesmiscore) +* [TwitCasting] Check for password protection by [Lesmiscore](https://github.com/Lesmiscore) +* [twitcasting] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [twitch] Fix field name of `view_count` +* [twitter] Fix for private videos by [iphoting](https://github.com/iphoting) +* [washingtonpost] Fix extractor by [Bricio](https://github.com/Bricio) +* [youtube:tab] Add `approximate_date` extractor-arg +* [youtube:tab] Follow redirect to regional channel by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Reject webpage data if redirected to home page +* [youtube] De-prioritize potentially damaged formats +* [youtube] Differentiate descriptive audio by language code +* [youtube] Ensure subtitle urls are absolute by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Escape possible `$` in `_extract_n_function_name` regex by [Lesmiscore](https://github.com/Lesmiscore) +* [youtube] Fix automatic captions +* [youtube] Fix n-sig extraction for phone player JS by [MinePlayersPE](https://github.com/MinePlayersPE) +* [youtube] Further de-prioritize 3gp format +* [youtube] Label original auto-subs +* [youtube] Prefer UTC upload date for videos by [coletdjnz](https://github.com/coletdjnz) +* [zaq1] Remove dead extractor by [marieell](https://github.com/marieell) +* [zee5] Support web-series by [Aniruddh-J](https://github.com/Aniruddh-J) +* [zingmp3] Fix extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [zoom] Add support for screen cast by [Mipsters](https://github.com/Mipsters) + + +### 2022.02.04 + +* [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz) +* [youtube:search] Add tests +* [twitcasting] Enforce UTF-8 for POST payload by [Lesmiscore](https://github.com/Lesmiscore) +* [mediaset] Fix extractor by [nixxo](https://github.com/nixxo) +* [websocket] Make syntax error in `websockets` module non-fatal + +### 2022.02.03 + +* Merge youtube-dl: Upto [commit/78ce962](https://github.com/ytdl-org/youtube-dl/commit/78ce962f4fe020994c216dd2671546fbe58a5c67) +* Add option `--print-to-file` +* Make nested --config-locations relative to parent file +* Ensure `_type` is present in `info.json` +* Fix `--compat-options list-formats` +* Fix/improve `InAdvancePagedList` +* [downloader/ffmpeg] Handle unknown formats better +* [outtmpl] Handle `-o ""` better +* [outtmpl] Handle hard-coded file extension better +* [extractor] Add convenience function `_yes_playlist` +* [extractor] Allow non-fatal `title` extraction +* [extractor] Extract video inside `Article` json_ld +* [generic] Allow further processing of json_ld URL +* [cookies] Fix keyring selection for unsupported desktops +* [utils] Strip double spaces in `clean_html` by [dirkf](https://github.com/dirkf) +* [aes] Add `unpad_pkcs7` +* [test] Fix `test_youtube_playlist_noplaylist` +* [docs,cleanup] Misc cleanup +* [dplay] Add extractors for site changes by [Sipherdrakon](https://github.com/Sipherdrakon) +* [ertgr] Add extractors by [zmousm](https://github.com/zmousm), [dirkf](https://github.com/dirkf) +* [Musicdex] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [YandexVideoPreview] Add extractor by [KiberInfinity](https://github.com/KiberInfinity) +* [youtube] Add extractor `YoutubeMusicSearchURLIE` +* [archive.org] Ignore unnecessary files +* [Bilibili] Add 8k support by [u-spec-png](https://github.com/u-spec-png) +* [bilibili] Fix extractor, make anthology title non-fatal +* [CAM4] Add thumbnail extraction by [alerikaisattera](https://github.com/alerikaisattera) +* [cctv] De-prioritize sample format +* [crunchyroll:beta] Add cookies support by [tejing1](https://github.com/tejing1) +* [crunchyroll] Fix login by [tejing1](https://github.com/tejing1) +* [doodstream] Fix extractor +* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [FFmpegConcat] Abort on --skip-download and download errors +* [Fujitv] Extract metadata and support premium by [YuenSzeHong](https://github.com/YuenSzeHong) +* [globo] Fix extractor by [Bricio](https://github.com/Bricio) +* [glomex] Simplify embed detection +* [GoogleSearch] Fix extractor +* [Instagram] Fix extraction when logged in by [MinePlayersPE](https://github.com/MinePlayersPE) +* [iq.com] Add VIP support by [MinePlayersPE](https://github.com/MinePlayersPE) +* [mildom] Fix extractor by [lazypete365](https://github.com/lazypete365) +* [MySpass] Fix video url processing by [trassshhub](https://github.com/trassshhub) +* [Odnoklassniki] Improve embedded players extraction by [KiberInfinity](https://github.com/KiberInfinity) +* [orf:tvthek] Lazy playlist extraction and obey --no-playlist +* [Pladform] Fix redirection to external player by [KiberInfinity](https://github.com/KiberInfinity) +* [ThisOldHouse] Improve Premium URL check by [Ashish0804](https://github.com/Ashish0804) +* [TikTok] Iterate through app versions by [MinePlayersPE](https://github.com/MinePlayersPE) +* [tumblr] Fix 403 errors and handle vimeo embeds by [foghawk](https://github.com/foghawk) +* [viki] Fix "Bad request" for manifest by [nyuszika7h](https://github.com/nyuszika7h) +* [Vimm] add recording extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [web.archive:youtube] Add `ytarchive:` prefix and misc cleanup +* [youtube:api] Do not use seek when reading HTTPError response by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix n-sig for player e06dea74 +* [youtube, cleanup] Misc fixes and cleanup + + +### 2022.01.21 + +* Add option `--concat-playlist` to **concat videos in a playlist** +* Allow **multiple and nested configuration files** +* Add more post-processing stages (`after_video`, `playlist`) +* Allow `--exec` to be run at any post-processing stage (Deprecates `--exec-before-download`) +* Allow `--print` to be run at any post-processing stage +* Allow listing formats, thumbnails, subtitles using `--print` by [pukkandan](https://github.com/pukkandan), [Zirro](https://github.com/Zirro) +* Add fields `video_autonumber`, `modified_date`, `modified_timestamp`, `playlist_count`, `channel_follower_count` +* Add key `requested_downloads` in the root `info_dict` +* Write `download_archive` only after all formats are downloaded +* [FfmpegMetadata] Allow setting metadata of individual streams using `meta<n>_` prefix +* Add option `--legacy-server-connect` by [xtkoba](https://github.com/xtkoba) +* Allow escaped `,` in `--extractor-args` +* Allow unicode characters in `info.json` +* Check for existing thumbnail/subtitle in final directory +* Don't treat empty containers as `None` in `sanitize_info` +* Fix `-s --ignore-no-formats --force-write-archive` +* Fix live title for multiple formats +* List playlist thumbnails in `--list-thumbnails` +* Raise error if subtitle download fails +* [cookies] Fix bug when keyring is unspecified +* [ffmpeg] Ignore unknown streams, standardize use of `-map 0` +* [outtmpl] Alternate form for `D` and fix suffix's case +* [utils] Add `Sec-Fetch-Mode` to `std_headers` +* [utils] Fix `format_bytes` output for Bytes by [pukkandan](https://github.com/pukkandan), [mdawar](https://github.com/mdawar) +* [utils] Handle `ss:xxx` in `parse_duration` +* [utils] Improve parsing for nested HTML elements by [zmousm](https://github.com/zmousm), [pukkandan](https://github.com/pukkandan) +* [utils] Use key `None` in `traverse_obj` to return as-is +* [extractor] Detect more subtitle codecs in MPD manifests by [fstirlitz](https://github.com/fstirlitz) +* [extractor] Extract chapters from JSON-LD by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan) +* [extractor] Extract thumbnails from JSON-LD by [nixxo](https://github.com/nixxo) +* [extractor] Improve `url_result` and related +* [generic] Improve KVS player extraction by [trassshhub](https://github.com/trassshhub) +* [build] Reduce dependency on third party workflows +* [extractor,cleanup] Use `_search_nextjs_data`, `format_field` +* [cleanup] Minor fixes and cleanup +* [docs] Improvements +* [test] Fix TestVerboseOutput +* [afreecatv] Add livestreams extractor by [wlritchi](https://github.com/wlritchi) +* [callin] Add extractor by [foghawk](https://github.com/foghawk) +* [CrowdBunker] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [daftsex] Add extractors by [k3ns1n](https://github.com/k3ns1n) +* [digitalconcerthall] Add extractor by [teridon](https://github.com/teridon) +* [Drooble] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [EuropeanTour] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [iq.com] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE) +* [KelbyOne] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [LnkIE] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [MainStreaming] Add extractor by [coletdjnz](https://github.com/coletdjnz) +* [megatvcom] Add extractors by [zmousm](https://github.com/zmousm) +* [Newsy] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [noodlemagazine] Add extractor by [trassshhub](https://github.com/trassshhub) +* [PokerGo] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [Pornez] Add extractor by [mozlima](https://github.com/mozlima) +* [PRX] Add Extractors by [coletdjnz](https://github.com/coletdjnz) +* [RTNews] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [Rule34video] Add extractor by [trassshhub](https://github.com/trassshhub) +* [tvopengr] Add extractors by [zmousm](https://github.com/zmousm) +* [Vimm] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [glomex] Add extractors by [zmousm](https://github.com/zmousm) +* [instagram] Add story/highlight extractor by [u-spec-png](https://github.com/u-spec-png) +* [openrec] Add movie extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [rai] Add Raiplaysound extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +* [aparat] Fix extractor +* [ard] Extract subtitles by [fstirlitz](https://github.com/fstirlitz) +* [BiliIntl] Add login by [MinePlayersPE](https://github.com/MinePlayersPE) +* [CeskaTelevize] Use `http` for manifests +* [CTVNewsIE] Add fallback for video search by [Ashish0804](https://github.com/Ashish0804) +* [dplay] Migrate DiscoveryPlusItaly to DiscoveryPlus by [timendum](https://github.com/timendum) +* [dplay] Re-structure DiscoveryPlus extractors +* [Dropbox] Support password protected files and more formats by [zenerdi0de](https://github.com/zenerdi0de) +* [facebook] Fix extraction from groups +* [facebook] Improve title and uploader extraction +* [facebook] Parse dash manifests +* [fox] Extract m3u8 from preview by [ischmidt20](https://github.com/ischmidt20) +* [funk] Support origin URLs +* [gfycat] Fix `uploader` +* [gfycat] Support embeds by [coletdjnz](https://github.com/coletdjnz) +* [hotstar] Add extractor args to ignore tags by [Ashish0804](https://github.com/Ashish0804) +* [hrfernsehen] Fix ardloader extraction by [CreaValix](https://github.com/CreaValix) +* [instagram] Fix username extraction for stories and highlights by [nyuszika7h](https://github.com/nyuszika7h) +* [kakao] Detect geo-restriction +* [line] Remove `tv.line.me` by [sian1468](https://github.com/sian1468) +* [mixch] Add `MixchArchiveIE` by [Lesmiscore](https://github.com/Lesmiscore) +* [mixcloud] Detect restrictions by [llacb47](https://github.com/llacb47) +* [NBCSports] Fix extraction of platform URLs by [ischmidt20](https://github.com/ischmidt20) +* [Nexx] Extract more metadata by [MinePlayersPE](https://github.com/MinePlayersPE) +* [Nexx] Support 3q CDN by [MinePlayersPE](https://github.com/MinePlayersPE) +* [pbs] de-prioritize AD formats +* [PornHub,YouTube] Refresh onion addresses by [unit193](https://github.com/unit193) +* [RedBullTV] Parse subtitles from manifest by [Ashish0804](https://github.com/Ashish0804) +* [streamcz] Fix extractor by [arkamar](https://github.com/arkamar), [pukkandan](https://github.com/pukkandan) +* [Ted] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [trassshhub](https://github.com/trassshhub) +* [Theta] Fix valid URL by [alerikaisattera](https://github.com/alerikaisattera) +* [ThisOldHouseIE] Add support for premium videos by [Ashish0804](https://github.com/Ashish0804) +* [TikTok] Fix extraction for sigi-based webpages, add API fallback by [MinePlayersPE](https://github.com/MinePlayersPE) +* [TikTok] Pass cookies to formats, and misc fixes by [MinePlayersPE](https://github.com/MinePlayersPE) +* [TikTok] Extract captions, user thumbnail by [MinePlayersPE](https://github.com/MinePlayersPE) +* [TikTok] Change app version by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) +* [TVer] Extract message for unaired live by [Lesmiscore](https://github.com/Lesmiscore) +* [twitcasting] Refactor extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [twitter] Fix video in quoted tweets +* [veoh] Improve extractor by [foghawk](https://github.com/foghawk) +* [vk] Capture `clip` URLs +* [vk] Fix VKUserVideosIE by [Ashish0804](https://github.com/Ashish0804) +* [vk] Improve `_VALID_URL` by [k3ns1n](https://github.com/k3ns1n) +* [VrtNU] Handle empty title by [pgaig](https://github.com/pgaig) +* [XVideos] Check HLS formats by [MinePlayersPE](https://github.com/MinePlayersPE) +* [yahoo:gyao] Improved playlist handling by [hyano](https://github.com/hyano) +* [youtube:tab] Extract more playlist metadata by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [youtube:tab] Raise error on tab redirect by [krichbanana](https://github.com/krichbanana), [coletdjnz](https://github.com/coletdjnz) +* [youtube] Update Innertube clients by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Detect live-stream embeds +* [youtube] Do not return `upload_date` for playlists +* [youtube] Extract channel subscriber count by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Make invalid storyboard URL non-fatal +* [youtube] Enforce UTC, update innertube clients and tests by [coletdjnz](https://github.com/coletdjnz) +* [zdf] Add chapter extraction by [iw0nderhow](https://github.com/iw0nderhow) +* [zee5] Add geo-bypass + + +### 2021.12.27 + +* Avoid recursion error when re-extracting info +* [ffmpeg] Fix position of `--ppa` +* [aria2c] Don't show progress when `--no-progress` +* [cookies] Support other keyrings by [mbway](https://github.com/mbway) +* [EmbedThumbnail] Prefer AtomicParsley over ffmpeg if available +* [generic] Fix HTTP KVS Player by [git-anony-mouse](https://github.com/git-anony-mouse) +* [ThumbnailsConvertor] Fix for when there are no thumbnails +* [docs] Add examples for using `TYPES:` in `-P`/`-o` +* [PixivSketch] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) +* [tiktok] Add music, sticker and tag IEs by [MinePlayersPE](https://github.com/MinePlayersPE) +* [BiliIntl] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE) +* [CBC] Fix URL regex +* [tiktok] Fix `extractor_key` used in archive +* [youtube] **End `live-from-start` properly when stream ends with 403** +* [Zee5] Fix VALID_URL for tv-shows by [Ashish0804](https://github.com/Ashish0804) + +### 2021.12.25 + +* [dash,youtube] **Download live from start to end** by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan) + * Add option `--live-from-start` to enable downloading live videos from start + * Add key `is_from_start` in formats to identify formats (of live videos) that downloads from start + * [dash] Create protocol `http_dash_segments_generator` that allows a function to be passed instead of fragments + * [fragment] Allow multiple live dash formats to download simultaneously + * [youtube] Implement fragment re-fetching for the live dash formats + * [youtube] Re-extract dash manifest every 5 hours (manifest expires in 6hrs) + * [postprocessor/ffmpeg] Add `FFmpegFixupDuplicateMoovPP` to fixup duplicated moov atoms + * Known issues: + * Ctrl+C doesn't work on Windows when downloading multiple formats + * If video becomes private, download hangs +* [SponsorBlock] Add `Filler` and `Highlight` categories by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) + * Change `--sponsorblock-cut all` to `--sponsorblock-cut default` if you do not want filler sections to be removed +* Add field `webpage_url_domain` +* Add interactive format selection with `-f -` +* Add option `--file-access-retries` by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb) +* [outtmpl] Add alternate forms `S`, `D` and improve `id` detection +* [outtmpl] Add operator `&` for replacement text by [PilzAdam](https://github.com/PilzAdam) +* [EmbedSubtitle] Disable duration check temporarily +* [extractor] Add `_search_nuxt_data` by [nao20010128nao](https://github.com/nao20010128nao) +* [extractor] Ignore errors in comment extraction when `-i` is given +* [extractor] Standardize `_live_title` +* [FormatSort] Prevent incorrect deprecation warning +* [generic] Extract m3u8 formats from JSON-LD +* [postprocessor/ffmpeg] Always add `faststart` +* [utils] Fix parsing `YYYYMMDD` dates in Nov/Dec by [wlritchi](https://github.com/wlritchi) +* [utils] Improve `parse_count` +* [utils] Update `std_headers` by [kikuyan](https://github.com/kikuyan), [fstirlitz](https://github.com/fstirlitz) +* [lazy_extractors] Fix for search IEs +* [extractor] Support default implicit graph in JSON-LD by [zmousm](https://github.com/zmousm) +* Allow `--no-write-thumbnail` to override `--write-all-thumbnail` +* Fix `--throttled-rate` +* Fix control characters being printed to `--console-title` +* Fix PostProcessor hooks not registered for some PPs +* Pre-process when using `--flat-playlist` +* Remove known invalid thumbnails from `info_dict` +* Add warning when using `-f best` +* Use `parse_duration` for `--wait-for-video` and some minor fix +* [test/download] Add more fields +* [test/download] Ignore field `webpage_url_domain` by [std-move](https://github.com/std-move) +* [compat] Suppress errors in enabling VT mode +* [docs] Improve manpage format by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan) +* [docs,cleanup] Minor fixes and cleanup +* [cleanup] Fix some typos by [unit193](https://github.com/unit193) +* [ABC:iview] Add show extractor by [pabs3](https://github.com/pabs3) +* [dropout] Add extractor by [TwoThousandHedgehogs](https://github.com/TwoThousandHedgehogs), [pukkandan](https://github.com/pukkandan) +* [GameJolt] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE) +* [gofile] Add extractor by [Jertzukka](https://github.com/Jertzukka), [Ashish0804](https://github.com/Ashish0804) +* [hse] Add extractors by [cypheron](https://github.com/cypheron), [pukkandan](https://github.com/pukkandan) +* [NateTV] Add NateIE and NateProgramIE by [Ashish0804](https://github.com/Ashish0804), [Hyeeji](https://github.com/Hyeeji) +* [OpenCast] Add extractors by [bwildenhain](https://github.com/bwildenhain), [C0D3D3V](https://github.com/C0D3D3V) +* [rtve] Add `RTVEAudioIE` by [kebianizao](https://github.com/kebianizao) +* [Rutube] Add RutubeChannelIE by [Ashish0804](https://github.com/Ashish0804) +* [skeb] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) +* [soundcloud] Add related tracks extractor by [Lapin0t](https://github.com/Lapin0t) +* [toggo] Add extractor by [nyuszika7h](https://github.com/nyuszika7h) +* [TrueID] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE) +* [audiomack] Update album and song VALID_URL by [abdullah-if](https://github.com/abdullah-if), [dirkf](https://github.com/dirkf) +* [CBC Gem] Extract 1080p formats by [DavidSkrundz](https://github.com/DavidSkrundz) +* [ceskatelevize] Fetch iframe from nextJS data by [mkubecek](https://github.com/mkubecek) +* [crackle] Look for non-DRM formats by [raleeper](https://github.com/raleeper) +* [dplay] Temporary fix for `discoveryplus.com/it` +* [DiscoveryPlusShowBaseIE] yield actual video id by [Ashish0804](https://github.com/Ashish0804) +* [Facebook] Handle redirect URLs +* [fujitv] Extract 1080p from `tv_android` m3u8 by [YuenSzeHong](https://github.com/YuenSzeHong) +* [gronkh] Support new URL pattern by [Sematre](https://github.com/Sematre) +* [instagram] Expand valid URL by [u-spec-png](https://github.com/u-spec-png) +* [Instagram] Try bypassing login wall with embed page by [MinePlayersPE](https://github.com/MinePlayersPE) +* [Jamendo] Fix use of `_VALID_URL_RE` by [jaller94](https://github.com/jaller94) +* [LBRY] Support livestreams by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) +* [NJPWWorld] Extract formats from m3u8 by [aarubui](https://github.com/aarubui) +* [NovaEmbed] update player regex by [std-move](https://github.com/std-move) +* [npr] Make SMIL extraction non-fatal by [r5d](https://github.com/r5d) +* [ntvcojp] Extract NUXT data by [nao20010128nao](https://github.com/nao20010128nao) +* [ok.ru] add mobile fallback by [nao20010128nao](https://github.com/nao20010128nao) +* [olympics] Add uploader and cleanup by [u-spec-png](https://github.com/u-spec-png) +* [ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack) +* [PlutoTV] Expand `_VALID_URL` +* [RaiNews] Fix extractor by [nixxo](https://github.com/nixxo) +* [RCTIPlusSeries] Lazy extraction and video type selection by [MinePlayersPE](https://github.com/MinePlayersPE) +* [redtube] Handle formats delivered inside a JSON by [dirkf](https://github.com/dirkf), [nixxo](https://github.com/nixxo) +* [SonyLiv] Add OTP login support by [Ashish0804](https://github.com/Ashish0804) +* [Steam] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [TikTok] Pass cookies to mobile API by [MinePlayersPE](https://github.com/MinePlayersPE) +* [trovo] Fix inheritance of `TrovoChannelBaseIE` +* [TVer] Extract better thumbnails by [YuenSzeHong](https://github.com/YuenSzeHong) +* [vimeo] Extract chapters +* [web.archive:youtube] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz) +* [youtube:comments] Add more options for limiting number of comments extracted by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Extract more metadata from feeds/channels/playlists by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Extract video thumbnails from playlist by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [youtube:tab] Ignore query when redirecting channel to playlist and cleanup of related code +* [youtube] Fix `ytsearchdate` +* [zdf] Support videos with different ptmd location by [iw0nderhow](https://github.com/iw0nderhow) +* [zee5] Support /episodes in URL + + +### 2021.12.01 + +* **Add option `--wait-for-video` to wait for scheduled streams** +* Add option `--break-per-input` to apply --break-on... to each input URL +* Add option `--embed-info-json` to embed info.json in mkv +* Add compat-option `embed-metadata` +* Allow using a custom format selector through API +* [AES] Add ECB mode by [nao20010128nao](https://github.com/nao20010128nao) +* [build] Fix MacOS Build +* [build] Save Git HEAD at release alongside version info +* [build] Use `workflow_dispatch` for release +* [downloader/ffmpeg] Fix for direct videos inside mpd manifests +* [downloader] Add colors to download progress +* [EmbedSubtitles] Slightly relax duration check and related cleanup +* [ExtractAudio] Fix conversion to `wav` and `vorbis` +* [ExtractAudio] Support `alac` +* [extractor] Extract `average_rating` from JSON-LD +* [FixupM3u8] Fixup MPEG-TS in MP4 container +* [generic] Support mpd manifests without extension by [shirt](https://github.com/shirt-dev) +* [hls] Better FairPlay DRM detection by [nyuszika7h](https://github.com/nyuszika7h) +* [jsinterp] Fix splice to handle float (for youtube js player f1ca6900) +* [utils] Allow alignment in `render_table` and add tests +* [utils] Fix `PagedList` +* [utils] Fix error when copying `LazyList` +* Clarify video/audio-only formats in -F +* Ensure directory exists when checking formats +* Ensure path for link files exists by [Zirro](https://github.com/Zirro) +* Ensure same config file is not loaded multiple times +* Fix `postprocessor_hooks` +* Fix `--break-on-archive` when pre-checking +* Fix `--check-formats` for `mhtml` +* Fix `--load-info-json` of playlists with failed entries +* Fix `--trim-filename` when filename has `.` +* Fix bug in parsing `--add-header` +* Fix error in `report_unplayable_conflict` by [shirt](https://github.com/shirt-dev) +* Fix writing playlist infojson with `--no-clean-infojson` +* Validate --get-bypass-country +* [blogger] Add extractor by [pabs3](https://github.com/pabs3) +* [breitbart] Add extractor by [Grabien](https://github.com/Grabien) +* [CableAV] Add extractor by [j54vc1bk](https://github.com/j54vc1bk) +* [CanalAlpha] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [CozyTV] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [CPTwentyFour] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [DiscoveryPlus] Add `DiscoveryPlusItalyShowIE` by [Ashish0804](https://github.com/Ashish0804) +* [ESPNCricInfo] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [LinkedIn] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [mixch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) +* [nebula] Add `NebulaCollectionIE` and rewrite extractor by [hheimbuerger](https://github.com/hheimbuerger) +* [OneFootball] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [peer.tv] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [radiozet] Add extractor by [0xA7404A](https://github.com/0xA7404A) (Aurora) +* [redgifs] Add extractor by [chio0hai](https://github.com/chio0hai) +* [RedGifs] Add Search and User extractors by [Deer-Spangle](https://github.com/Deer-Spangle) +* [rtrfm] Add extractor by [pabs3](https://github.com/pabs3) +* [Streamff] Add extractor by [cntrl-s](https://github.com/cntrl-s) +* [Stripchat] Add extractor by [zulaport](https://github.com/zulaport) +* [Aljazeera] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [AmazonStoreIE] Fix regex to not match vdp urls by [Ashish0804](https://github.com/Ashish0804) +* [ARDBetaMediathek] Handle new URLs +* [bbc] Get all available formats by [nyuszika7h](https://github.com/nyuszika7h) +* [Bilibili] Fix title extraction by [u-spec-png](https://github.com/u-spec-png) +* [CBC Gem] Fix for shows that don't have all seasons by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [curiositystream] Add more metadata +* [CuriosityStream] Fix series +* [DiscoveryPlus] Rewrite extractors by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) +* [HotStar] Set language field from tags by [Ashish0804](https://github.com/Ashish0804) +* [instagram, cleanup] Refactor extractors +* [Instagram] Display more login errors by [MinePlayersPE](https://github.com/MinePlayersPE) +* [itv] Fix extractor by [staubichsauger](https://github.com/staubichsauger), [pukkandan](https://github.com/pukkandan) +* [mediaklikk] Expand valid URL +* [MTV] Improve mgid extraction by [Sipherdrakon](https://github.com/Sipherdrakon), [kikuyan](https://github.com/kikuyan) +* [nexx] Better error message for unsupported format +* [NovaEmbed] Fix extractor by [pukkandan](https://github.com/pukkandan), [std-move](https://github.com/std-move) +* [PatreonUser] Do not capture RSS URLs +* [Reddit] Add support for 1080p videos by [xenova](https://github.com/xenova) +* [RoosterTeethSeries] Fix for multiple pages by [MinePlayersPE](https://github.com/MinePlayersPE) +* [sbs] Fix for movies and livestreams +* [Senate.gov] Add SenateGovIE and fix SenateISVPIE by [Grabien](https://github.com/Grabien), [pukkandan](https://github.com/pukkandan) +* [soundcloud:search] Fix pagination +* [tiktok:user] Set `webpage_url` correctly +* [Tokentube] Fix description by [u-spec-png](https://github.com/u-spec-png) +* [trovo] Fix extractor by [nyuszika7h](https://github.com/nyuszika7h) +* [tv2] Expand valid URL +* [Tvplayhome] Fix extractor by [pukkandan](https://github.com/pukkandan), [18928172992817182](https://github.com/18928172992817182) +* [Twitch:vod] Add chapters by [mpeter50](https://github.com/mpeter50) +* [twitch:vod] Extract live status by [DEvmIb](https://github.com/DEvmIb) +* [VidLii] Add 720p support by [mrpapersonic](https://github.com/mrpapersonic) +* [vimeo] Add fallback for config URL +* [vimeo] Sort http formats higher +* [WDR] Expand valid URL +* [willow] Add extractor by [aarubui](https://github.com/aarubui) +* [xvideos] Detect embed URLs by [4a1e2y5](https://github.com/4a1e2y5) +* [xvideos] Fix extractor by [Yakabuff](https://github.com/Yakabuff) +* [youtube, cleanup] Reorganize Tab and Search extractor inheritances +* [youtube:search_url] Add playlist/channel support +* [youtube] Add `default` player client by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Add storyboard formats +* [youtube] Decrypt n-sig for URLs with `ratebypass` +* [youtube] Minor improvement to format sorting +* [cleanup] Add deprecation warnings +* [cleanup] Refactor `JSInterpreter._seperate` +* [Cleanup] Remove some unnecessary groups in regexes by [Ashish0804](https://github.com/Ashish0804) +* [cleanup] Misc cleanup + + +### 2021.11.10.1 + +* Temporarily disable MacOS Build + +### 2021.11.10 + +* [youtube] **Fix throttling by decrypting n-sig** +* Merging extractors from [haruhi-dl](https://git.sakamoto.pl/laudom/haruhi-dl) by [selfisekai](https://github.com/selfisekai) + * [extractor] Add `_search_nextjs_data` + * [tvp] Fix extractors + * [tvp] Add TVPStreamIE + * [wppilot] Add extractors + * [polskieradio] Add extractors + * [radiokapital] Add extractors + * [polsatgo] Add extractor by [selfisekai](https://github.com/selfisekai), [sdomi](https://github.com/sdomi) +* Separate `--check-all-formats` from `--check-formats` +* Approximate filesize from bitrate +* Don't create console in `windows_enable_vt_mode` +* Fix bug in `--load-infojson` of playlists +* [minicurses] Add colors to `-F` and standardize color-printing code +* [outtmpl] Add type `link` for internet shortcut files +* [outtmpl] Add alternate forms for `q` and `j` +* [outtmpl] Do not traverse `None` +* [fragment] Fix progress display in fragmented downloads +* [downloader/ffmpeg] Fix vtt download with ffmpeg +* [ffmpeg] Detect presence of setts and libavformat version +* [ExtractAudio] Rescale `--audio-quality` correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) +* [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal) +* [FormatSort] `eac3` is better than `ac3` +* [FormatSort] Fix some fields' defaults +* [generic] Detect more json_ld +* [generic] parse jwplayer with only the json URL +* [extractor] Add keyword automatically to SearchIE descriptions +* [extractor] Fix some errors being converted to `ExtractorError` +* [utils] Add `join_nonempty` +* [utils] Add `jwt_decode_hs256` by [Ashish0804](https://github.com/Ashish0804) +* [utils] Create `DownloadCancelled` exception +* [utils] Parse `vp09` as vp9 +* [utils] Sanitize URL when determining protocol +* [test/download] Fallback test to `bv` +* [docs] Minor documentation improvements +* [cleanup] Improvements to error and debug messages +* [cleanup] Minor fixes and cleanup +* [3speak] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [AmazonStore] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [Gab] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [mediaset] Add playlist support by [nixxo](https://github.com/nixxo) +* [MLSScoccer] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [N1] Add support for nova.rs by [u-spec-png](https://github.com/u-spec-png) +* [PlanetMarathi] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [RaiplayRadio] Add extractors by [frafra](https://github.com/frafra) +* [roosterteeth] Add series extractor +* [sky] Add `SkyNewsStoryIE` by [ajj8](https://github.com/ajj8) +* [youtube] Fix sorting for some videos +* [youtube] Populate `thumbnail` with the best "known" thumbnail +* [youtube] Refactor itag processing +* [youtube] Remove unnecessary no-playlist warning +* [youtube:tab] Add Invidious list for playlists/channels by [rhendric](https://github.com/rhendric) +* [Bilibili:comments] Fix infinite loop by [u-spec-png](https://github.com/u-spec-png) +* [ceskatelevize] Fix extractor by [flashdagger](https://github.com/flashdagger) +* [Coub] Fix media format identification by [wlritchi](https://github.com/wlritchi) +* [crunchyroll] Add extractor-args `language` and `hardsub` +* [DiscoveryPlus] Allow language codes in URL +* [imdb] Fix thumbnail by [ozburo](https://github.com/ozburo) +* [instagram] Add IOS URL support by [u-spec-png](https://github.com/u-spec-png) +* [instagram] Improve login code by [u-spec-png](https://github.com/u-spec-png) +* [Instagram] Improve metadata extraction by [u-spec-png](https://github.com/u-spec-png) +* [iPrima] Fix extractor by [stanoarn](https://github.com/stanoarn) +* [itv] Add support for ITV News by [ajj8](https://github.com/ajj8) +* [la7] Fix extractor by [nixxo](https://github.com/nixxo) +* [linkedin] Don't login multiple times +* [mtv] Fix some videos by [Sipherdrakon](https://github.com/Sipherdrakon) +* [Newgrounds] Fix description by [u-spec-png](https://github.com/u-spec-png) +* [Nrk] Minor fixes by [fractalf](https://github.com/fractalf) +* [Olympics] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [piksel] Fix sorting +* [twitter] Do not sort by codec +* [viewlift] Add cookie-based login and series support by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) +* [vimeo] Detect source extension and misc cleanup by [flashdagger](https://github.com/flashdagger) +* [vimeo] Fix ondemand videos and direct URLs with hash +* [vk] Fix login and add subtitles by [kaz-us](https://github.com/kaz-us) +* [VLive] Add upload_date and thumbnail by [Ashish0804](https://github.com/Ashish0804) +* [VRT] Fix login by [pgaig](https://github.com/pgaig) +* [Vupload] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [wakanim] Add support for MPD manifests by [nyuszika7h](https://github.com/nyuszika7h) +* [wakanim] Detect geo-restriction by [nyuszika7h](https://github.com/nyuszika7h) +* [ZenYandex] Fix extractor by [u-spec-png](https://github.com/u-spec-png) + + +### 2021.10.22 + +* [build] Improvements + * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev) + * Release windows exe built with `py2exe` + * Enable lazy-extractors in releases + * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental) + * Clean up error reporting in update + * Refactor `pyinst.py`, misc cleanup and improve docs +* [docs] Migrate issues to use forms by [Ashish0804](https://github.com/Ashish0804) +* [downloader] **Fix slow progress hooks** + * This was causing HLS/DASH downloads to be extremely slow in some situations +* [downloader/ffmpeg] Improve simultaneous download and merge +* [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key +* [ModifyChapters] Add ability for `--remove-chapters` to remove sections by timestamp +* [utils] Allow duration strings in `--match-filter` +* Add HDR information to formats +* Add negative option `--no-batch-file` by [Zirro](https://github.com/Zirro) +* Calculate more fields for merged formats +* Do not verify thumbnail URLs unless `--check-formats` is specified +* Don't create console for subprocesses on Windows +* Fix `--restrict-filename` when used with default template +* Fix `check_formats` output being written to stdout when `-qv` +* Fix bug in storyboards +* Fix conflict b/w id and ext in format selection +* Fix verbose head not showing custom configs +* Load archive only after printing verbose head +* Make `duration_string` and `resolution` available in --match-filter +* Re-implement deprecated option `--id` +* Reduce default `--socket-timeout` +* Write verbose header to logger +* [outtmpl] Fix bug in expanding environment variables +* [cookies] Local State should be opened as utf-8 +* [extractor,utils] Detect more codecs/mimetypes +* [extractor] Detect `EXT-X-KEY` Apple FairPlay +* [utils] Use `importlib` to load plugins by [sulyi](https://github.com/sulyi) +* [http] Retry on socket timeout and show the last encountered error +* [fragment] Print error message when skipping fragment +* [aria2c] Fix `--skip-unavailable-fragment` +* [SponsorBlock] Obey `extractor-retries` and `sleep-requests` +* [Merger] Do not add `aac_adtstoasc` to non-hls audio +* [ModifyChapters] Do not mutate original chapters by [nihil-admirari](https://github.com/nihil-admirari) +* [devscripts/run_tests] Use markers to filter tests by [sulyi](https://github.com/sulyi) +* [7plus] Add cookie based authentication by [nyuszika7h](https://github.com/nyuszika7h) +* [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman) +* [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [crunchyroll] Add season to flat-playlist +* [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code +* [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804) +* [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [hidive] Fix typo +* [Hotstar] Mention Dynamic Range in `format_id` by [Ashish0804](https://github.com/Ashish0804) +* [Hotstar] Raise appropriate error for DRM +* [instagram] Add login by [u-spec-png](https://github.com/u-spec-png) +* [instagram] Show appropriate error when login is needed +* [microsoftstream] Add extractor by [damianoamatruda](https://github.com/damianoamatruda), [nixklai](https://github.com/nixklai) +* [on24] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) +* [patreon] Fix vimeo player regex by [zenerdi0de](https://github.com/zenerdi0de) +* [SkyNewsAU] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [tagesschau] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [tbs] Add tbs live streams by [llacb47](https://github.com/llacb47) +* [tiktok] Fix typo and update tests +* [trovo] Support channel clips and VODs by [Ashish0804](https://github.com/Ashish0804) +* [Viafree] Add support for Finland by [18928172992817182](https://github.com/18928172992817182) +* [vimeo] Fix embedded `player.vimeo` +* [vlive:channel] Fix extraction by [kikuyan](https://github.com/kikuyan), [pukkandan](https://github.com/pukkandan) +* [youtube] Add auto-translated subtitles +* [youtube] Expose different formats with same itag +* [youtube:comments] Fix for new layout by [coletdjnz](https://github.com/coletdjnz) +* [cleanup] Cleanup bilibili code by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png) +* [cleanup] Remove broken youtube login code +* [cleanup] Standardize timestamp formatting code +* [cleanup] Generalize `getcomments` implementation for extractors +* [cleanup] Simplify search extractors code +* [cleanup] misc + + +### 2021.10.10 + +* [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor` +* [minicurses] Fix when printing to file +* [downloader] Fix throttledratelimit +* [francetv] Fix extractor by [fstirlitz](https://github.com/fstirlitz), [sarnoud](https://github.com/sarnoud) +* [NovaPlay] Add extractor by [Bojidarist](https://github.com/Bojidarist) +* [ffmpeg] Revert "Set max probesize" - No longer needed +* [docs] Remove incorrect dependency on VC++10 +* [build] Allow to release without changelog + +### 2021.10.09 + +* Improved progress reporting + * Separate `--console-title` and `--no-progress` + * Add option `--progress` to show progress-bar even in quiet mode + * Fix and refactor `minicurses` and use it for all progress reporting + * Standardize use of terminal sequences and enable color support for windows 10 + * Add option `--progress-template` to customize progress-bar and console-title + * Add postprocessor hooks and progress reporting +* [postprocessor] Add plugin support with option `--use-postprocessor` +* [extractor] Extract storyboards from SMIL manifests by [fstirlitz](https://github.com/fstirlitz) +* [outtmpl] Alternate form of format type `l` for `\n` delimited list +* [outtmpl] Format type `U` for unicode normalization +* [outtmpl] Allow empty output template to skip a type of file +* Merge webm formats into mkv if thumbnails are to be embedded +* [adobepass] Add RCN as MSO by [jfogelman](https://github.com/jfogelman) +* [ciscowebex] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) +* [Gettr] Add extractor by [i6t](https://github.com/i6t) +* [GoPro] Add extractor by [i6t](https://github.com/i6t) +* [N1] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [Theta] Add video extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [Veo] Add extractor by [i6t](https://github.com/i6t) +* [Vupload] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [bbc] Extract better quality videos by [ajj8](https://github.com/ajj8) +* [Bilibili] Add subtitle converter by [u-spec-png](https://github.com/u-spec-png) +* [CBC] Cleanup tests by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [Douyin] Rewrite extractor by [MinePlayersPE](https://github.com/MinePlayersPE) +* [Funimation] Fix for /v/ urls by [pukkandan](https://github.com/pukkandan), [Jules-A](https://github.com/Jules-A) +* [Funimation] Sort formats according to the relevant extractor-args +* [Hidive] Fix duplicate and incorrect formats +* [HotStarSeries] Fix cookies by [Ashish0804](https://github.com/Ashish0804) +* [LinkedInLearning] Add subtitles by [Ashish0804](https://github.com/Ashish0804) +* [Mediaite] Relax valid url by [coletdjnz](https://github.com/coletdjnz) +* [Newgrounds] Add age_limit and fix duration by [u-spec-png](https://github.com/u-spec-png) +* [Newgrounds] Fix view count on songs by [u-spec-png](https://github.com/u-spec-png) +* [parliamentlive.tv] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [PolskieRadio] Fix extractors by [jakubadamw](https://github.com/jakubadamw), [u-spec-png](https://github.com/u-spec-png) +* [reddit] Add embedded url by [u-spec-png](https://github.com/u-spec-png) +* [reddit] Fix 429 by generating a random `reddit_session` by [AjaxGb](https://github.com/AjaxGb) +* [Rumble] Add RumbleChannelIE by [Ashish0804](https://github.com/Ashish0804) +* [soundcloud:playlist] Detect last page correctly +* [SovietsCloset] Add duration from m3u8 by [ChillingPepper](https://github.com/ChillingPepper) +* [Streamable] Add codecs by [u-spec-png](https://github.com/u-spec-png) +* [vidme] Remove extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [youtube:tab] Fallback to API when webpage fails to download by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix non-fatal errors in fetching player +* Fix `--flat-playlist` when neither IE nor id is known +* Fix `-f mp4` behaving differently from youtube-dl +* Workaround for bug in `ssl.SSLContext.load_default_certs` +* [aes] Improve performance slightly by [sulyi](https://github.com/sulyi) +* [cookies] Fix keyring fallback by [mbway](https://github.com/mbway) +* [embedsubtitle] Fix error when duration is unknown +* [ffmpeg] Fix error when subtitle file is missing +* [ffmpeg] Set max probesize to workaround AAC HLS stream issues by [shirt](https://github.com/shirt-dev) +* [FixupM3u8] Remove redundant run if merged is needed +* [hls] Fix decryption issues by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) +* [http] Respect user-provided chunk size over extractor's +* [utils] Let traverse_obj accept functions as keys +* [docs] Add note about our custom ffmpeg builds +* [docs] Write embedding and contributing documentation by [pukkandan](https://github.com/pukkandan), [timethrow](https://github.com/timethrow) +* [update] Check for new version even if not updateable +* [build] Add more files to the tarball +* [build] Allow building with py2exe (and misc fixes) +* [build] Use pycryptodomex by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) +* [cleanup] Some minor refactoring, improve docs and misc cleanup + + +### 2021.09.25 + +* Add new option `--netrc-location` +* [outtmpl] Allow alternate fields using `,` +* [outtmpl] Add format type `B` to treat the value as bytes, e.g. to limit the filename to a certain number of bytes +* Separate the options `--ignore-errors` and `--no-abort-on-error` +* Basic framework for simultaneous download of multiple formats by [nao20010128nao](https://github.com/nao20010128nao) +* [17live] Add 17.live extractor by [nao20010128nao](https://github.com/nao20010128nao) +* [bilibili] Add BiliIntlIE and BiliIntlSeriesIE by [Ashish0804](https://github.com/Ashish0804) +* [CAM4] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [Chingari] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [CGTN] Add extractor by [chao813](https://github.com/chao813) +* [damtomo] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) +* [gotostage] Add extractor by [poschi3](https://github.com/poschi3) +* [Koo] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [Mediaite] Add Extractor by [Ashish0804](https://github.com/Ashish0804) +* [Mediaklikk] Add Extractor by [tmarki](https://github.com/tmarki), [mrx23dot](https://github.com/mrx23dot), [coletdjnz](https://github.com/coletdjnz) +* [MuseScore] Add Extractor by [Ashish0804](https://github.com/Ashish0804) +* [Newgrounds] Add NewgroundsUserIE and improve extractor by [u-spec-png](https://github.com/u-spec-png) +* [nzherald] Add NZHeraldIE by [coletdjnz](https://github.com/coletdjnz) +* [Olympics] Add replay extractor by [Ashish0804](https://github.com/Ashish0804) +* [Peertube] Add channel and playlist extractors by [u-spec-png](https://github.com/u-spec-png) +* [radlive] Add extractor by [nyuszika7h](https://github.com/nyuszika7h) +* [SovietsCloset] Add extractor by [ChillingPepper](https://github.com/ChillingPepper) +* [Streamanity] Add Extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [Theta] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [Yandex] Add ZenYandexIE and ZenYandexChannelIE by [Ashish0804](https://github.com/Ashish0804) +* [9Now] handle episodes of series by [dalanmiller](https://github.com/dalanmiller) +* [AnimalPlanet] Fix extractor by [Sipherdrakon](https://github.com/Sipherdrakon) +* [Arte] Improve description extraction by [renalid](https://github.com/renalid) +* [atv.at] Use jwt for API by [NeroBurner](https://github.com/NeroBurner) +* [brightcove] Extract subtitles from manifests +* [CBC] Fix CBC Gem extractors by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [cbs] Report appropriate error for DRM +* [comedycentral] Support `collection-playlist` by [nixxo](https://github.com/nixxo) +* [DIYNetwork] Support new format by [Sipherdrakon](https://github.com/Sipherdrakon) +* [downloader/niconico] Pass custom headers by [nao20010128nao](https://github.com/nao20010128nao) +* [dw] Fix extractor +* [Fancode] Fix live streams by [zenerdi0de](https://github.com/zenerdi0de) +* [funimation] Fix for locations outside US by [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan) +* [globo] Fix GloboIE by [Ashish0804](https://github.com/Ashish0804) +* [HiDive] Fix extractor by [Ashish0804](https://github.com/Ashish0804) +* [Hotstar] Add referer for subs by [Ashish0804](https://github.com/Ashish0804) +* [itv] Fix extractor, add subtitles and thumbnails by [coletdjnz](https://github.com/coletdjnz), [sleaux-meaux](https://github.com/sleaux-meaux), [Vangelis66](https://github.com/Vangelis66) +* [lbry] Show error message from API response +* [Mxplayer] Use mobile API by [Ashish0804](https://github.com/Ashish0804) +* [NDR] Rewrite NDRIE by [Ashish0804](https://github.com/Ashish0804) +* [Nuvid] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [Oreilly] Handle new web url by [MKSherbini](https://github.com/MKSherbini) +* [pbs] Fix subtitle extraction by [coletdjnz](https://github.com/coletdjnz), [gesa](https://github.com/gesa), [raphaeldore](https://github.com/raphaeldore) +* [peertube] Update instances by [u-spec-png](https://github.com/u-spec-png) +* [plutotv] Fix extractor for URLs with `/en` +* [reddit] Workaround for 429 by redirecting to old.reddit.com +* [redtube] Fix exts +* [soundcloud] Make playlist extraction lazy +* [soundcloud] Retry playlist pages on `502` error and update `_CLIENT_ID` +* [southpark] Fix SouthParkDE by [coletdjnz](https://github.com/coletdjnz) +* [SovietsCloset] Fix playlists for games with only named categories by [ConquerorDopy](https://github.com/ConquerorDopy) +* [SpankBang] Fix uploader by [f4pp3rk1ng](https://github.com/f4pp3rk1ng), [coletdjnz](https://github.com/coletdjnz) +* [tiktok] Use API to fetch higher quality video by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) +* [TikTokUser] Fix extractor using mobile API by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) +* [videa] Fix some extraction errors by [nyuszika7h](https://github.com/nyuszika7h) +* [VrtNU] Handle login errors by [llacb47](https://github.com/llacb47) +* [vrv] Don't raise error when thumbnails are missing +* [youtube] Cleanup authentication code by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix `--mark-watched` with `--cookies-from-browser` +* [youtube] Improvements to JS player extraction and add extractor-args to skip it by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Retry on 'Unknown Error' by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Return full URL instead of just ID +* [youtube] Warn when trying to download clips +* [zdf] Improve format sorting +* [zype] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz) +* Allow `--force-write-archive` to work with `--flat-playlist` +* Download subtitles in order of `--sub-langs` +* Allow `0` in `--playlist-items` +* Handle more playlist errors with `-i` +* Fix `--no-get-comments` +* Fix `extra_info` being reused across runs +* Fix compat options `no-direct-merge` and `playlist-index` +* Dump files should obey `--trim-filename` by [sulyi](https://github.com/sulyi) +* [aes] Add `aes_gcm_decrypt_and_verify` by [sulyi](https://github.com/sulyi), [pukkandan](https://github.com/pukkandan) +* [aria2c] Fix IV for some AES-128 streams by [shirt](https://github.com/shirt-dev) +* [compat] Don't ignore `HOME` (if set) on windows +* [cookies] Make browser names case insensitive +* [cookies] Print warning for cookie decoding error only once +* [extractor] Fix root-relative URLs in MPD by [DigitalDJ](https://github.com/DigitalDJ) +* [ffmpeg] Add `aac_adtstoasc` when merging if needed +* [fragment,aria2c] Generalize and refactor some code +* [fragment] Avoid repeated request for AES key +* [fragment] Fix range header when using `-N` and media sequence by [shirt](https://github.com/shirt-dev) +* [hls,aes] Fallback to native implementation for AES-CBC and detect `Cryptodome` in addition to `Crypto` +* [hls] Byterange + AES128 is supported by native downloader +* [ModifyChapters] Improve sponsor chapter merge algorithm by [nihil-admirari](https://github.com/nihil-admirari) +* [ModifyChapters] Minor fixes +* [WebVTT] Adjust parser to accommodate PBS subtitles +* [utils] Improve `extract_timezone` by [dirkf](https://github.com/dirkf) +* [options] Fix `--no-config` and refactor reading of config files +* [options] Strip spaces and ignore empty entries in list-like switches +* [test/cookies] Improve logging +* [build] Automate more of the release process by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan) +* [build] Fix sha256 by [nihil-admirari](https://github.com/nihil-admirari) +* [build] Bring back brew taps by [nao20010128nao](https://github.com/nao20010128nao) +* [build] Provide `--onedir` zip for windows +* [cleanup,docs] Add deprecation warning in docs for some counter intuitive behaviour +* [cleanup] Fix line endings for `nebula.py` by [glenn-slayden](https://github.com/glenn-slayden) +* [cleanup] Improve `make clean-test` by [sulyi](https://github.com/sulyi) +* [cleanup] Misc + + +### 2021.09.02 + +* **Native SponsorBlock** implementation by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) + * `--sponsorblock-remove CATS` removes specified chapters from file + * `--sponsorblock-mark CATS` marks the specified sponsor sections as chapters + * `--sponsorblock-chapter-title TMPL` to specify sponsor chapter template + * `--sponsorblock-api URL` to use a different API + * No re-encoding is done unless `--force-keyframes-at-cuts` is used + * The fetched sponsor sections are written to the infojson + * Deprecates: `--sponskrub`, `--no-sponskrub`, `--sponskrub-cut`, `--no-sponskrub-cut`, `--sponskrub-force`, `--no-sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` +* Split `--embed-chapters` from `--embed-metadata` (it still implies the former by default) +* Add option `--remove-chapters` to remove arbitrary chapters by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) +* Add option `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters by [nihil-admirari](https://github.com/nihil-admirari) +* Let `--match-filter` reject entries early + * Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views` +* [lazy_extractor] Improvements (It now passes all tests) + * Bugfix for when plugin directory doesn't exist by [kidonng](https://github.com/kidonng) + * Create instance only after pre-checking archive + * Import actual class if an attribute is accessed + * Fix `suitable` and add flake8 test +* [downloader/ffmpeg] Experimental support for DASH manifests (including live) + * Your ffmpeg must have [this patch](https://github.com/FFmpeg/FFmpeg/commit/3249c757aed678780e22e99a1a49f4672851bca9) applied for YouTube DASH to work +* [downloader/ffmpeg] Allow passing custom arguments before `-i` +* [BannedVideo] Add extractor by [smege1001](https://github.com/smege1001), [blackjack4494](https://github.com/blackjack4494), [pukkandan](https://github.com/pukkandan) +* [bilibili] Add category extractor by [animelover1984](https://github.com/animelover1984) +* [Epicon] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [filmmodu] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque) +* [GabTV] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [Hungama] Fix `HungamaSongIE` and add `HungamaAlbumPlaylistIE` by [Ashish0804](https://github.com/Ashish0804) +* [ManotoTV] Add new extractors by [tandy1000](https://github.com/tandy1000) +* [Niconico] Add Search extractors by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan) +* [Patreon] Add `PatreonUserIE` by [zenerdi0de](https://github.com/zenerdi0de) +* [peloton] Add extractor by [IONECarter](https://github.com/IONECarter), [capntrips](https://github.com/capntrips), [pukkandan](https://github.com/pukkandan) +* [ProjectVeritas] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [radiko] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) +* [StarTV] Add extractor for `startv.com.tr` by [mrfade](https://github.com/mrfade), [coletdjnz](https://github.com/coletdjnz) +* [tiktok] Add `TikTokUserIE` by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) +* [Tokentube] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [TV2Hu] Fix `TV2HuIE` and add `TV2HuSeriesIE` by [Ashish0804](https://github.com/Ashish0804) +* [voicy] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) +* [adobepass] Fix Verizon SAML login by [nyuszika7h](https://github.com/nyuszika7h), [ParadoxGBB](https://github.com/ParadoxGBB) +* [afreecatv] Fix adult VODs by [wlritchi](https://github.com/wlritchi) +* [afreecatv] Tolerate failure to parse date string by [wlritchi](https://github.com/wlritchi) +* [aljazeera] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE) +* [ATV.at] Fix extractor for ATV.at by [NeroBurner](https://github.com/NeroBurner), [coletdjnz](https://github.com/coletdjnz) +* [bitchute] Fix test by [mahanstreamer](https://github.com/mahanstreamer) +* [camtube] Remove obsolete extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [CDA] Add more formats by [u-spec-png](https://github.com/u-spec-png) +* [eroprofile] Fix page skipping in albums by [jhwgh1968](https://github.com/jhwgh1968) +* [facebook] Fix format sorting +* [facebook] Fix metadata extraction by [kikuyan](https://github.com/kikuyan) +* [facebook] Update onion URL by [Derkades](https://github.com/Derkades) +* [HearThisAtIE] Fix extractor by [Ashish0804](https://github.com/Ashish0804) +* [instagram] Add referrer to prevent throttling by [u-spec-png](https://github.com/u-spec-png), [kikuyan](https://github.com/kikuyan) +* [iwara.tv] Extract more metadata by [BunnyHelp](https://github.com/BunnyHelp) +* [iwara] Add thumbnail by [i6t](https://github.com/i6t) +* [kakao] Fix extractor +* [mediaset] Fix extraction for some videos by [nyuszika7h](https://github.com/nyuszika7h) +* [Motherless] Fix extractor by [coletdjnz](https://github.com/coletdjnz) +* [Nova] fix extractor by [std-move](https://github.com/std-move) +* [ParamountPlus] Fix geo verification by [shirt](https://github.com/shirt-dev) +* [peertube] handle new video URL format by [Chocobozzz](https://github.com/Chocobozzz) +* [pornhub] Separate and fix playlist extractor by [mzbaulhaque](https://github.com/mzbaulhaque) +* [reddit] Fix for quarantined subreddits by [ouwou](https://github.com/ouwou) +* [ShemarooMe] Fix extractor by [Ashish0804](https://github.com/Ashish0804) +* [soundcloud] Refetch `client_id` on 403 +* [tiktok] Fix metadata extraction +* [TV2] Fix extractor by [Ashish0804](https://github.com/Ashish0804) +* [tv5mondeplus] Fix extractor by [korli](https://github.com/korli) +* [VH1,TVLand] Fix extractors by [Sipherdrakon](https://github.com/Sipherdrakon) +* [Viafree] Fix extractor and extract subtitles by [coletdjnz](https://github.com/coletdjnz) +* [XHamster] Extract `uploader_id` by [octotherp](https://github.com/octotherp) +* [youtube] Add `shorts` to `_VALID_URL` +* [youtube] Add av01 itags to known formats list by [blackjack4494](https://github.com/blackjack4494) +* [youtube] Extract error messages from HTTPError response by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix subtitle names +* [youtube] Prefer audio stream that YouTube considers default +* [youtube] Remove annotations and deprecate `--write-annotations` by [coletdjnz](https://github.com/coletdjnz) +* [Zee5] Fix extractor and add subtitles by [Ashish0804](https://github.com/Ashish0804) +* [aria2c] Obey `--rate-limit` +* [EmbedSubtitle] Continue even if some files are missing +* [extractor] Better error message for DRM +* [extractor] Common function `_match_valid_url` +* [extractor] Show video id in error messages if possible +* [FormatSort] Remove priority of `lang` +* [options] Add `_set_from_options_callback` +* [SubtitleConvertor] Fix bug during subtitle conversion +* [utils] Add `parse_qs` +* [webvtt] Fix timestamp overflow adjustment by [fstirlitz](https://github.com/fstirlitz) +* Bugfix for `--replace-in-metadata` +* Don't try to merge with final extension +* Fix `--force-overwrites` when using `-k` +* Fix `--no-prefer-free-formats` by [CeruleanSky](https://github.com/CeruleanSky) +* Fix `-F` for extractors that directly return url +* Fix `-J` when there are failed videos +* Fix `extra_info` being reused across runs +* Fix `playlist_index` not obeying `playlist_start` and add tests +* Fix resuming of single formats when using `--no-part` +* Revert erroneous use of the `Content-Length` header by [fstirlitz](https://github.com/fstirlitz) +* Use `os.replace` where applicable by; paulwrubel +* [build] Add homebrew taps `yt-dlp/taps/yt-dlp` by [nao20010128nao](https://github.com/nao20010128nao) +* [build] Fix bug in making `yt-dlp.tar.gz` +* [docs] Fix some typos by [pukkandan](https://github.com/pukkandan), [zootedb0t](https://github.com/zootedb0t) +* [cleanup] Replace improper use of tab in trovo by [glenn-slayden](https://github.com/glenn-slayden) + + +### 2021.08.10 + +* Add option `--replace-in-metadata` +* Add option `--no-simulate` to not simulate even when `--print` or `--list...` are used - Deprecates `--print-json` +* Allow entire infodict to be printed using `%()s` - makes `--dump-json` redundant +* Allow multiple `--exec` and `--exec-before-download` +* Add regex to `--match-filter` +* Add all format filtering operators also to `--match-filter` by [max-te](https://github.com/max-te) +* Add compat-option `no-keep-subs` +* [adobepass] Add MSO Cablevision by [Jessecar96](https://github.com/Jessecar96) +* [BandCamp] Add BandcampMusicIE by [Ashish0804](https://github.com/Ashish0804) +* [blackboardcollaborate] Add new extractor by [mzbaulhaque](https://github.com/mzbaulhaque) +* [eroprofile] Add album downloader by [jhwgh1968](https://github.com/jhwgh1968) +* [mirrativ] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) +* [openrec] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) +* [nbcolympics:stream] Fix extractor by [nchilada](https://github.com/nchilada), [pukkandan](https://github.com/pukkandan) +* [nbcolympics] Update extractor for 2020 olympics by [wesnm](https://github.com/wesnm) +* [paramountplus] Separate extractor and fix some titles by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) +* [RCTIPlus] Support events and TV by [MinePlayersPE](https://github.com/MinePlayersPE) +* [Newgrounds] Improve extractor and fix playlist by [u-spec-png](https://github.com/u-spec-png) +* [aenetworks] Update `_THEPLATFORM_KEY` and `_THEPLATFORM_SECRET` by [wesnm](https://github.com/wesnm) +* [crunchyroll] Fix thumbnail by [funniray](https://github.com/funniray) +* [HotStar] Use API for metadata and extract subtitles by [Ashish0804](https://github.com/Ashish0804) +* [instagram] Fix comments extraction by [u-spec-png](https://github.com/u-spec-png) +* [peertube] Fix videos without description by [u-spec-png](https://github.com/u-spec-png) +* [twitch:clips] Extract `display_id` by [dirkf](https://github.com/dirkf) +* [viki] Print error message from API request +* [Vine] Remove invalid formats by [u-spec-png](https://github.com/u-spec-png) +* [VrtNU] Fix XSRF token by [pgaig](https://github.com/pgaig) +* [vrv] Fix thumbnail extraction by [funniray](https://github.com/funniray) +* [youtube] Add extractor-arg `include-live-dash` to show live dash formats +* [youtube] Improve signature function detection by [PSlava](https://github.com/PSlava) +* [youtube] Raise appropriate error when API pages can't be downloaded +* Ensure `_write_ytdl_file` closes file handle on error +* Fix `--compat-options filename` by [stdedos](https://github.com/stdedos) +* Fix issues with infodict sanitization +* Fix resuming when using `--no-part` +* Fix wrong extension for intermediate files +* Handle `BrokenPipeError` by [kikuyan](https://github.com/kikuyan) +* Show libraries present in verbose head +* [extractor] Detect `sttp` as subtitles in MPD by [fstirlitz](https://github.com/fstirlitz) +* [extractor] Reset non-repeating warnings per video +* [ffmpeg] Fix streaming `mp4` to `stdout` +* [ffpmeg] Allow `--ffmpeg-location` to be a file with different name +* [utils] Fix `InAdvancePagedList.__getitem__` +* [utils] Fix `traverse_obj` depth when `is_user_input` +* [webvtt] Merge daisy-chained duplicate cues by [fstirlitz](https://github.com/fstirlitz) +* [build] Use custom build of `pyinstaller` by [shirt](https://github.com/shirt-dev) +* [tests:download] Add batch testing for extractors (`test_YourExtractor_all`) +* [docs] Document which fields `--add-metadata` adds to the file +* [docs] Fix some mistakes and improve doc +* [cleanup] Misc code cleanup + + +### 2021.08.02 + +* Add logo, banner and donate links +* [outtmpl] Expand and escape environment variables +* [outtmpl] Add format types `j` (json), `l` (comma delimited list), `q` (quoted for terminal) +* [downloader] Allow streaming some unmerged formats to stdout using ffmpeg +* [youtube] **Age-gate bypass** + * Add `agegate` clients by [pukkandan](https://github.com/pukkandan), [MinePlayersPE](https://github.com/MinePlayersPE) + * Add `thirdParty` to agegate clients to bypass more videos + * Simplify client definitions, expose `embedded` clients + * Improve age-gate detection by [coletdjnz](https://github.com/coletdjnz) + * Fix default global API key by [coletdjnz](https://github.com/coletdjnz) + * Add `creator` clients for age-gate bypass using unverified accounts by [zerodytrash](https://github.com/zerodytrash), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [adobepass] Add MSO Sling TV by [wesnm](https://github.com/wesnm) +* [CBS] Add ParamountPlusSeriesIE by [Ashish0804](https://github.com/Ashish0804) +* [dplay] Add `ScienceChannelIE` by [Sipherdrakon](https://github.com/Sipherdrakon) +* [UtreonIE] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [youtube] Add `mweb` client by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Add `player_client=all` +* [youtube] Force `hl=en` for comments by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix format sorting when using alternate clients +* [youtube] Misc cleanup by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz) +* [youtube] Extract SAPISID only once +* [CBS] Add fallback by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan) +* [Hotstar] Support cookies by [Ashish0804](https://github.com/Ashish0804) +* [HotStarSeriesIE] Fix regex by [Ashish0804](https://github.com/Ashish0804) +* [bilibili] Improve `_VALID_URL` +* [mediaset] Fix extraction by [nixxo](https://github.com/nixxo) +* [Mxplayer] Add h265 formats by [Ashish0804](https://github.com/Ashish0804) +* [RCTIPlus] Remove PhantomJS dependency by [MinePlayersPE](https://github.com/MinePlayersPE) +* [tenplay] Add MA15+ age limit by [pento](https://github.com/pento) +* [vidio] Fix login error detection by [MinePlayersPE](https://github.com/MinePlayersPE) +* [vimeo] Better extraction of original file by [Ashish0804](https://github.com/Ashish0804) +* [generic] Support KVS player (replaces ThisVidIE) by [rigstot](https://github.com/rigstot) +* Add compat-option `no-clean-infojson` +* Remove `asr` appearing twice in `-F` +* Set `home:` as the default key for `-P` +* [utils] Fix slicing of reversed `LazyList` +* [FormatSort] Fix bug for audio with unknown codec +* [test:download] Support testing with `ignore_no_formats_error` +* [cleanup] Refactor some code + + +### 2021.07.24 + +* [youtube:tab] Extract video duration early +* [downloader] Pass `info_dict` to `progress_hook`s +* [youtube] Fix age-gated videos for API clients when cookies are supplied by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Disable `get_video_info` age-gate workaround - This endpoint seems to be completely dead +* [youtube] Try all clients even if age-gated +* [youtube] Fix subtitles only being extracted from the first client +* [youtube] Simplify `_get_text` +* [cookies] bugfix for microsoft edge on macOS +* [cookies] Handle `sqlite` `ImportError` gracefully by [mbway](https://github.com/mbway) +* [cookies] Handle errors when importing `keyring` + +### 2021.07.21 + +* **Add option `--cookies-from-browser`** to load cookies from a browser by [mbway](https://github.com/mbway) + * Usage: `--cookies-from-browser BROWSER[:PROFILE_NAME_OR_PATH]` + * Also added `--no-cookies-from-browser` + * To decrypt chromium cookies, `keyring` is needed for UNIX and `pycryptodome` for Windows +* Add option `--exec-before-download` +* Add field `live_status` +* [FFmpegMetadata] Add language of each stream and some refactoring +* [douyin] Add extractor by [pukkandan](https://github.com/pukkandan), [pyx](https://github.com/pyx) +* [pornflip] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque) +* **[youtube] Extract data from multiple clients** by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz) + * `player_client` now accepts multiple clients + * Default `player_client` = `android,web` + * This uses twice as many requests, but avoids throttling for most videos while also not losing any formats + * Music clients can be specifically requested and is enabled by default if `music.youtube.com` + * Added `player_client=ios` (Known issue: formats from ios are not sorted correctly) + * Add age-gate bypass for android and ios clients +* [youtube] Extract more thumbnails + * The thumbnail URLs are hard-coded and their actual existence is tested lazily + * Added option `--no-check-formats` to not test them +* [youtube] Misc fixes + * Improve extraction of livestream metadata by [pukkandan](https://github.com/pukkandan), [krichbanana](https://github.com/krichbanana) + * Hide live dash formats since they can't be downloaded anyway + * Fix authentication when using multiple accounts by [coletdjnz](https://github.com/coletdjnz) + * Fix controversial videos when requested via API by [coletdjnz](https://github.com/coletdjnz) + * Fix session index extraction and headers for non-web player clients by [coletdjnz](https://github.com/coletdjnz) + * Make `--extractor-retries` work for more errors + * Fix sorting of 3gp format + * Sanity check `chapters` (and refactor related code) + * Make `parse_time_text` and `_extract_chapters` non-fatal + * Misc cleanup and bug fixes by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Fix channels tab +* [youtube:tab] Extract playlist availability by [coletdjnz](https://github.com/coletdjnz) +* **[youtube:comments] Move comment extraction to new API** by [coletdjnz](https://github.com/coletdjnz) + * Adds extractor-args `comment_sort` (`top`/`new`), `max_comments`, `max_comment_depth` +* [youtube:comments] Fix `is_favorited`, improve `like_count` parsing by [coletdjnz](https://github.com/coletdjnz) +* [BravoTV] Improve metadata extraction by [kevinoconnor7](https://github.com/kevinoconnor7) +* [crunchyroll:playlist] Force http +* [yahoo:gyao:player] Relax `_VALID_URL` by [nao20010128nao](https://github.com/nao20010128nao) +* [nebula] Authentication via tokens from cookie jar by [hheimbuerger](https://github.com/hheimbuerger), [TpmKranz](https://github.com/TpmKranz) +* [RTP] Fix extraction and add subtitles by [fstirlitz](https://github.com/fstirlitz) +* [viki] Rewrite extractors and add extractor-arg `video_types` to `vikichannel` by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan) +* [vlive] Extract thumbnail directly in addition to the one from Naver +* [generic] Extract previously missed subtitles by [fstirlitz](https://github.com/fstirlitz) +* [generic] Extract everything in the SMIL manifest and detect discarded subtitles by [fstirlitz](https://github.com/fstirlitz) +* [embedthumbnail] Fix `_get_thumbnail_resolution` +* [metadatafromfield] Do not detect numbers as field names +* Fix selectors `all`, `mergeall` and add tests +* Errors in playlist extraction should obey `--ignore-errors` +* Fix bug where `original_url` was not propagated when `_type`=`url` +* Revert "Merge webm formats into mkv if thumbnails are to be embedded (#173)" + * This was wrongly checking for `write_thumbnail` +* Improve `extractor_args` parsing +* Rename `NOTE` in `-F` to `MORE INFO` since it's often confused to be the same as `format_note` +* Add `only_once` param for `write_debug` and `report_warning` +* [extractor] Allow extracting multiple groups in `_search_regex` by [fstirlitz](https://github.com/fstirlitz) +* [utils] Improve `traverse_obj` +* [utils] Add `variadic` +* [utils] Improve `js_to_json` comment regex by [fstirlitz](https://github.com/fstirlitz) +* [webtt] Fix timestamps +* [compat] Remove unnecessary code +* [docs] fix default of multistreams + + +### 2021.07.07 + +* Merge youtube-dl: Upto [commit/a803582](https://github.com/ytdl-org/youtube-dl/commit/a8035827177d6b59aca03bd717acb6a9bdd75ada) +* Add `--extractor-args` to pass some extractor-specific arguments. See [readme](https://github.com/yt-dlp/yt-dlp#extractor-arguments) + * Add extractor option `skip` for `youtube`, e.g. `--extractor-args youtube:skip=hls,dash` + * Deprecates `--youtube-skip-dash-manifest`, `--youtube-skip-hls-manifest`, `--youtube-include-dash-manifest`, `--youtube-include-hls-manifest` +* Allow `--list...` options to work with `--print`, `--quiet` and other `--list...` options +* [youtube] Use `player` API for additional video extraction requests by [coletdjnz](https://github.com/coletdjnz) + * **Fixes youtube premium music** (format 141) extraction + * Adds extractor option `player_client` = `web`/`android` + * **`--extractor-args youtube:player_client=android` works around the throttling** for the time-being + * Adds extractor option `player_skip=config` + * Adds age-gate fallback using embedded client +* [youtube] Choose correct Live chat API for upcoming streams by [krichbanana](https://github.com/krichbanana) +* [youtube] Fix subtitle names for age-gated videos +* [youtube:comments] Fix error handling and add `itct` to params by [coletdjnz](https://github.com/coletdjnz) +* [youtube_live_chat] Fix download with cookies by [siikamiika](https://github.com/siikamiika) +* [youtube_live_chat] use `clickTrackingParams` by [siikamiika](https://github.com/siikamiika) +* [Funimation] Rewrite extractor + * Add `FunimationShowIE` by [Mevious](https://github.com/Mevious) + * **Treat the different versions of an episode as different formats of a single video** + * This changes the video `id` and will break break existing archives + * Compat option `seperate-video-versions` to fall back to old behavior including using the old video ids + * Support direct `/player/` URL + * Extractor options `language` and `version` to pre-select them during extraction + * These options may be removed in the future if we can extract all formats without additional network requests + * Do not rely on these for format selection and use `-f` filters instead +* [AdobePass] Add Spectrum MSO by [kevinoconnor7](https://github.com/kevinoconnor7), [ohmybahgosh](https://github.com/ohmybahgosh) +* [facebook] Extract description and fix title +* [fancode] Fix extraction, support live and allow login with refresh token by [zenerdi0de](https://github.com/zenerdi0de) +* [plutotv] Improve `_VALID_URL` +* [RCTIPlus] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE) +* [Soundcloud] Allow login using oauth token by [blackjack4494](https://github.com/blackjack4494) +* [TBS] Support livestreams by [llacb47](https://github.com/llacb47) +* [videa] Fix extraction by [nyuszika7h](https://github.com/nyuszika7h) +* [yahoo] Fix extraction by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan) +* Process videos when using `--ignore-no-formats-error` by [krichbanana](https://github.com/krichbanana) +* Fix `--throttled-rate` when using `--load-info-json` +* Fix `--flat-playlist` when entry has no `ie_key` +* Fix `check_formats` catching `ExtractorError` instead of `DownloadError` +* Fix deprecated option `--list-formats-old` +* [downloader/ffmpeg] Fix `--ppa` when using simultaneous download +* [extractor] Prevent unnecessary download of hls manifests and refactor `hls_split_discontinuity` +* [fragment] Handle status of download and errors in threads correctly; and minor refactoring +* [thumbnailsconvertor] Treat `jpeg` as `jpg` +* [utils] Fix issues with `LazyList` reversal +* [extractor] Allow extractors to set their own login hint +* [cleanup] Simplify format selector code with `LazyList` and `yield from` +* [cleanup] Clean `extractor.common._merge_subtitles` signature +* [cleanup] Fix some typos + + +### 2021.06.23 + +* Merge youtube-dl: Upto [commit/379f52a](https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961) +* **Add option `--throttled-rate`** below which video data is re-extracted +* [fragment] **Merge during download for `-N`**, and refactor `hls`/`dash` +* [websockets] Add `WebSocketFragmentFD` by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan) +* Allow `images` formats in addition to video/audio +* [downloader/mhtml] Add new downloader for slideshows/storyboards by [fstirlitz](https://github.com/fstirlitz) +* [youtube] Temporary **fix for age-gate** +* [youtube] Support ongoing live chat by [siikamiika](https://github.com/siikamiika) +* [youtube] Improve SAPISID cookie handling by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Login is not needed for `:ytrec` +* [youtube] Non-fatal alert reporting for unavailable videos page by [coletdjnz](https://github.com/coletdjnz) +* [twitcasting] Websocket support by [nao20010128nao](https://github.com/nao20010128nao) +* [mediasite] Extract slides by [fstirlitz](https://github.com/fstirlitz) +* [funimation] Extract subtitles +* [pornhub] Extract `cast` +* [hotstar] Use server time for authentication instead of local time +* [EmbedThumbnail] Fix for already downloaded thumbnail +* [EmbedThumbnail] Add compat-option `embed-thumbnail-atomicparsley` +* Expand `--check-formats` to thumbnails +* Fix id sanitization in filenames +* Skip fixup of existing files and add `--fixup force` to force it +* Better error handling of syntax errors in `-f` +* Use `NamedTemporaryFile` for `--check-formats` +* [aria2c] Lower `--min-split-size` for HTTP downloads +* [options] Rename `--add-metadata` to `--embed-metadata` +* [utils] Improve `LazyList` and add tests +* [build] Build Windows x86 version with py3.7 and remove redundant tests by [pukkandan](https://github.com/pukkandan), [shirt](https://github.com/shirt-dev) +* [docs] Clarify that `--embed-metadata` embeds chapter markers +* [cleanup] Refactor fixup + + +### 2021.06.09 + +* Fix bug where `%(field)d` in filename template throws error +* [outtmpl] Improve offset parsing +* [test] More rigorous tests for `prepare_filename` + +### 2021.06.08 + +* Remove support for obsolete Python versions: Only 3.6+ is now supported +* Merge youtube-dl: Upto [commit/c2350ca](https://github.com/ytdl-org/youtube-dl/commit/c2350cac243ba1ec1586fe85b0d62d1b700047a2) +* [hls] Fix decryption for multithreaded downloader +* [extractor] Fix pre-checking archive for some extractors +* [extractor] Fix FourCC fallback when parsing ISM by [fstirlitz](https://github.com/fstirlitz) +* [twitcasting] Add TwitCastingUserIE, TwitCastingLiveIE by [pukkandan](https://github.com/pukkandan), [nao20010128nao](https://github.com/nao20010128nao) +* [vidio] Add VidioPremierIE and VidioLiveIE by [MinePlayersPE](Https://github.com/MinePlayersPE) +* [viki] Fix extraction from [ytdl-org/youtube-dl@59e583f](https://github.com/ytdl-org/youtube-dl/commit/59e583f7e8530ca92776c866897d895c072e2a82) +* [youtube] Support shorts URL +* [zoom] Extract transcripts as subtitles +* Add field `original_url` with the user-inputted URL +* Fix and refactor `prepare_outtmpl` +* Make more fields available for `--print` when used with `--flat-playlist` +* [utils] Generalize `traverse_dict` to `traverse_obj` +* [downloader/ffmpeg] Hide FFmpeg banner unless in verbose mode by [fstirlitz](https://github.com/fstirlitz) +* [build] Release `yt-dlp.tar.gz` +* [build,update] Add GNU-style SHA512 and prepare updater for similar SHA256 by [nihil-admirari](https://github.com/nihil-admirari) +* [pyinst] Show Python version in exe metadata by [nihil-admirari](https://github.com/nihil-admirari) +* [docs] Improve documentation of dependencies +* [cleanup] Mark unused files +* [cleanup] Point all shebang to `python3` by [fstirlitz](https://github.com/fstirlitz) +* [cleanup] Remove duplicate file `trovolive.py` + + +### 2021.06.01 + +* Merge youtube-dl: Upto [commit/d495292](https://github.com/ytdl-org/youtube-dl/commit/d495292852b6c2f1bd58bc2141ff2b0265c952cf) +* Pre-check archive and filters during playlist extraction +* Handle Basic Auth `user:pass` in URLs by [hhirtz](https://github.com/hhirtz) and [pukkandan](https://github.com/pukkandan) +* [archiveorg] Add YoutubeWebArchiveIE by [coletdjnz](https://github.com/coletdjnz) and [alex-gedeon](https://github.com/alex-gedeon) +* [fancode] Add extractor by [rhsmachine](https://github.com/rhsmachine) +* [patreon] Support vimeo embeds by [rhsmachine](https://github.com/rhsmachine) +* [Saitosan] Add new extractor by [llacb47](https://github.com/llacb47) +* [ShemarooMe] Add extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan) +* [telemundo] Add extractor by [king-millez](https://github.com/king-millez) +* [SonyLIV] Add SonyLIVSeriesIE and subtitle support by [Ashish0804](https://github.com/Ashish0804) +* [Hotstar] Add HotStarSeriesIE by [Ashish0804](https://github.com/Ashish0804) +* [Voot] Add VootSeriesIE by [Ashish0804](https://github.com/Ashish0804) +* [vidio] Support login and premium videos by [MinePlayersPE](https://github.com/MinePlayersPE) +* [fragment] When using `-N`, do not keep the fragment content in memory +* [ffmpeg] Download and merge in a single step if possible +* [ThumbnailsConvertor] Support conversion to `png` and make it the default by [louie-github](https://github.com/louie-github) +* [VideoConvertor] Generalize with remuxer and allow conditional recoding +* [EmbedThumbnail] Embed in `mp4`/`m4a` using mutagen by [tripulse](https://github.com/tripulse) and [pukkandan](https://github.com/pukkandan) +* [EmbedThumbnail] Embed if any thumbnail was downloaded, not just the best +* [EmbedThumbnail] Correctly escape filename +* [update] replace self without launching a subprocess in windows +* [update] Block further update for unsupported systems +* Refactor `__process_playlist` by creating `LazyList` +* Write messages to `stderr` when both `quiet` and `verbose` +* Sanitize and sort playlist thumbnails +* Remove `None` values from `info.json` +* [extractor] Always prefer native hls downloader by default +* [extractor] Skip subtitles without URI in m3u8 manifests by [hheimbuerger](https://github.com/hheimbuerger) +* [extractor] Functions to parse `socket.io` response as `json` by [pukkandan](https://github.com/pukkandan) and [llacb47](https://github.com/llacb47) +* [extractor] Allow `note=False` when extracting manifests +* [utils] Escape URLs in `sanitized_Request`, not `sanitize_url` +* [hls] Disable external downloader for `webtt` +* [youtube] `/live` URLs should raise error if channel is not live +* [youtube] Bug fixes +* [zee5] Fix m3u8 formats' extension +* [ard] Allow URLs without `-` before id by [olifre](https://github.com/olifre) +* [cleanup] `YoutubeDL._match_entry` +* [cleanup] Refactor updater +* [cleanup] Refactor ffmpeg convertors +* [cleanup] setup.py + + +### 2021.05.20 + +* **Youtube improvements**: + * Support youtube music `MP`, `VL` and `browse` pages + * Extract more formats for youtube music by [craftingmod](https://github.com/craftingmod), [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) + * Extract multiple subtitles in same language by [pukkandan](https://github.com/pukkandan) and [tpikonen](https://github.com/tpikonen) + * Redirect channels that doesn't have a `videos` tab to their `UU` playlists + * Support in-channel search + * Sort audio-only formats correctly + * Always extract `maxresdefault` thumbnail + * Extract audio language + * Add subtitle language names by [nixxo](https://github.com/nixxo) and [tpikonen](https://github.com/tpikonen) + * Show alerts only from the final webpage + * Add `html5=1` param to `get_video_info` page requests by [coletdjnz](https://github.com/coletdjnz) + * Better message when login required +* **Add option `--print`**: to print any field/template + * Makes redundant: `--get-description`, `--get-duration`, `--get-filename`, `--get-format`, `--get-id`, `--get-thumbnail`, `--get-title`, `--get-url` +* Field `additional_urls` to download additional videos from metadata using [`--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) +* Merge youtube-dl: Upto [commit/dfbbe29](https://github.com/ytdl-org/youtube-dl/commit/dfbbe2902fc67f0f93ee47a8077c148055c67a9b) +* Write thumbnail of playlist and add `pl_thumbnail` outtmpl key +* [embedthumbnail] Add `flac` support and refactor `mutagen` code by [pukkandan](https://github.com/pukkandan) and [tripulse](https://github.com/tripulse) +* [audius:artist] Add extractor by [king-millez](https://github.com/king-millez) +* [parlview] Add extractor by [king-millez](https://github.com/king-millez) +* [tenplay] Fix extractor by [king-millez](https://github.com/king-millez) +* [rmcdecouverte] Generalize `_VALID_URL` +* Add compat-option `no-attach-infojson` +* Add field `name` for subtitles +* Ensure `post_extract` and `pre_process` only run once +* Fix `--check-formats` when there is network error +* Standardize `write_debug` and `get_param` +* [options] Alias `--write-comments`, `--no-write-comments` +* [options] Refactor callbacks +* [test:download] Only extract enough videos for `playlist_mincount` +* [extractor] bugfix for when `compat_opts` is not given +* [build] Fix x86 build by [shirt](https://github.com/shirt-dev) +* [cleanup] code formatting, youtube tests and readme + +### 2021.05.11 +* **Deprecate support for Python versions < 3.6** +* **Subtitle extraction from manifests** by [fstirlitz](https://github.com/fstirlitz). See [be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details +* **Improve output template:** + * Allow slicing lists/strings using `field.start:end:step` + * A field can also be used as offset like `field1+num+field2` + * A default value can be given using `field|default` + * Prevent invalid fields from causing errors +* **Merge youtube-dl**: Upto [commit/a726009](https://github.com/ytdl-org/youtube-dl/commit/a7260099873acc6dc7d76cafad2f6b139087afd0) +* **Remove options** `-l`, `-t`, `-A` completely and disable `--auto-number`, `--title`, `--literal`, `--id` +* [Plugins] Prioritize plugins over standard extractors and prevent plugins from overwriting the standard extractor classes +* [downloader] Fix `quiet` and `to_stderr` +* [fragment] Ensure the file is closed on error +* [fragment] Make sure first segment is not skipped +* [aria2c] Fix whitespace being stripped off +* [embedthumbnail] Fix bug where jpeg thumbnails were converted again +* [FormatSort] Fix for when some formats have quality and others don't +* [utils] Add `network_exceptions` +* [utils] Escape URL while sanitizing +* [ukcolumn] Add Extractor +* [whowatch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) +* [CBS] Improve `_VALID_URL` to support movies +* [crackle] Improve extraction +* [curiositystream] Fix collections +* [francetvinfo] Improve video id extraction +* [generic] Respect the encoding in manifest +* [limelight] Obey `allow_unplayable_formats` +* [mediasite] Generalize URL pattern by [fstirlitz](https://github.com/fstirlitz) +* [mxplayer] Add MxplayerShowIE by [Ashish0804](https://github.com/Ashish0804) +* [nebula] Move to nebula.app by [Lamieur](https://github.com/Lamieur) +* [niconico] Fix HLS formats by [CXwudi](https://github.com/CXwudi), [tsukumijima](https://github.com/tsukumijima), [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan) +* [niconico] Fix title and thumbnail extraction by [CXwudi](https://github.com/CXwudi) +* [plutotv] Extract subtitles from manifests +* [plutotv] Fix format extraction for some urls +* [rmcdecouverte] Improve `_VALID_URL` +* [sonyliv] Fix `title` and `series` extraction by [Ashish0804](https://github.com/Ashish0804) +* [tubi] Raise "no video formats" error when video url is empty +* [youtube:tab] Detect playlists inside community posts +* [youtube] Add `oembed` to reserved names +* [zee5] Fix extraction for some URLs by [Hadi0609](https://github.com/Hadi0609) +* [zee5] Fix py2 compatibility +* Fix `playlist_index` and add `playlist_autonumber`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details +* Add experimental option `--check-formats` to test the URLs before format selection +* Option `--compat-options` to revert [some of yt-dlp's changes](https://github.com/yt-dlp/yt-dlp#differences-in-default-behavior) + * Deprecates `--list-formats-as-table`, `--list-formats-old` +* Fix number of digits in `%(playlist_index)s` +* Fix case sensitivity of format selector +* Revert "[core] be able to hand over id and title using url_result" +* Do not strip out whitespaces in `-o` and `-P` +* Fix `preload_download_archive` writing verbose message to `stdout` +* Move option warnings to `YoutubeDL`so that they obey `--no-warnings` and can output colors +* Py2 compatibility for `FileNotFoundError` + + +### 2021.04.22 +* **Improve output template:** + * Objects can be traversed like `%(field.key1.key2)s` + * An offset can be added to numeric fields as `%(field+N)s` + * Deprecates `--autonumber-start` +* **Improve `--sub-langs`:** + * Treat `--sub-langs` entries as regex + * `all` can be used to refer to all the subtitles + * language codes can be prefixed with `-` to exclude it + * Deprecates `--all-subs` +* Add option `--ignore-no-formats-error` to ignore the "no video format" and similar errors +* Add option `--skip-playlist-after-errors` to skip the rest of a playlist after a given number of errors are encountered +* Merge youtube-dl: Upto [commit/7e8b3f9](https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438) +* [downloader] Fix bug in downloader selection +* [BilibiliChannel] Fix pagination by [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan) +* [rai] Add support for http formats by [nixxo](https://github.com/nixxo) +* [TubiTv] Add TubiTvShowIE by [Ashish0804](https://github.com/Ashish0804) +* [twitcasting] Fix extractor +* [viu:ott] Fix extractor and support series by [lkho](https://github.com/lkho) and [pukkandan](https://github.com/pukkandan) +* [youtube:tab] Show unavailable videos in playlists by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Reload with unavailable videos for all playlists +* [youtube] Ignore invalid stretch ratio +* [youtube] Improve channel syncid extraction to support ytcfg by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Standardize API calls for tabs, mixes and search by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Bugfix in `_extract_ytcfg` +* [mildom:user:vod] Download only necessary amount of pages +* [mildom] Remove proxy completely by [fstirlitz](https://github.com/fstirlitz) +* [go] Fix `_VALID_URL` +* [MetadataFromField] Improve regex and add tests +* [Exec] Ensure backward compatibility when the command contains `%` +* [extractor] Fix inconsistent use of `report_warning` +* Ensure `mergeall` selects best format when multistreams are disabled +* Improve the yt-dlp.sh script by [fstirlitz](https://github.com/fstirlitz) +* [lazy_extractor] Do not load plugins +* [ci] Disable fail-fast +* [docs] Clarify which deprecated options still work +* [docs] Fix typos + + +### 2021.04.11 +* Add option `--convert-thumbnails` (only jpg currently supported) +* Format selector `mergeall` to download and merge all formats +* Pass any field to `--exec` using similar syntax to output template +* Choose downloader for each protocol using `--downloader PROTO:NAME` + * Alias `--downloader` for `--external-downloader` + * Added `native` as an option for the downloader +* Merge youtube-dl: Upto [commit/4fb25ff](https://github.com/ytdl-org/youtube-dl/commit/4fb25ff5a3be5206bb72e5c4046715b1529fb2c7) (except vimeo) +* [DiscoveryPlusIndia] Add DiscoveryPlusIndiaShowIE by [Ashish0804](https://github.com/Ashish0804) +* [NFHSNetwork] Add extractor by [llacb47](https://github.com/llacb47) +* [nebula] Add extractor (watchnebula.com) by [hheimbuerger](https://github.com/hheimbuerger) +* [nitter] Fix extraction of reply tweets and update instance list by [B0pol](https://github.com/B0pol) +* [nitter] Fix thumbnails by [B0pol](https://github.com/B0pol) +* [youtube] Fix thumbnail URL +* [youtube] Parse API parameters from initial webpage by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Extract comments' approximate timestamp by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix alert extraction +* [bilibili] Fix uploader +* [utils] Add `datetime_from_str` and `datetime_add_months` by [coletdjnz](https://github.com/coletdjnz) +* Run some `postprocessors` before actual download +* Improve argument parsing for `-P`, `-o`, `-S` +* Fix some `m3u8` not obeying `--allow-unplayable-formats` +* Fix default of `dynamic_mpd` +* Deprecate `--all-formats`, `--include-ads`, `--hls-prefer-native`, `--hls-prefer-ffmpeg` +* [docs] Improvements + +### 2021.04.03 +* Merge youtube-dl: Upto [commit/654b4f4](https://github.com/ytdl-org/youtube-dl/commit/654b4f4ff2718f38b3182c1188c5d569c14cc70a) +* Ability to set a specific field in the file's metadata using `--parse-metadata` +* Ability to select n'th best format like `-f bv*.2` +* [DiscoveryPlus] Add discoveryplus.in +* [la7] Add podcasts and podcast playlists by [nixxo](https://github.com/nixxo) +* [mildom] Update extractor with current proxy by [nao20010128nao](https://github.com/nao20010128nao) +* [ard:mediathek] Fix video id extraction +* [generic] Detect Invidious' link element +* [youtube] Show premium state in `availability` by [coletdjnz](https://github.com/coletdjnz) +* [viewsource] Add extractor to handle `view-source:` +* [sponskrub] Run before embedding thumbnail +* [docs] Improve `--parse-metadata` documentation + + +### 2021.03.24.1 +* Revert [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf) + +### 2021.03.24 +* Merge youtube-dl: Upto 2021.03.25 ([commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)) +* Parse metadata from multiple fields using `--parse-metadata` +* Ability to load playlist infojson using `--load-info-json` +* Write current epoch to infojson when using `--no-clean-infojson` +* [youtube_live_chat] fix bug when trying to set cookies +* [niconico] Fix for when logged in by [CXwudi](https://github.com/CXwudi) and [xtkoba](https://github.com/xtkoba) +* [linuxacadamy] Fix login + + +### 2021.03.21 +* Merge youtube-dl: Upto [commit/7e79ba7](https://github.com/ytdl-org/youtube-dl/commit/7e79ba7dd6e6649dd2ce3a74004b2044f2182881) +* Option `--no-clean-infojson` to keep private keys in the infojson +* [aria2c] Support retry/abort unavailable fragments by [damianoamatruda](https://github.com/damianoamatruda) +* [aria2c] Better default arguments +* [movefiles] Fix bugs and make more robust +* [formatSort] Fix `quality` being ignored +* [splitchapters] Fix for older ffmpeg +* [sponskrub] Pass proxy to sponskrub +* Make sure `post_hook` gets the final filename +* Recursively remove any private keys from infojson +* Embed video URL metadata inside `mp4` by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan) +* Merge `webm` formats into `mkv` if thumbnails are to be embedded by [damianoamatruda](https://github.com/damianoamatruda) +* Use headers and cookies when downloading subtitles by [damianoamatruda](https://github.com/damianoamatruda) +* Parse resolution in info dictionary by [damianoamatruda](https://github.com/damianoamatruda) +* More consistent warning messages by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan) +* [docs] Add deprecated options and aliases in readme +* [docs] Fix some minor mistakes + +* [niconico] Partial fix adapted from [animelover1984/youtube-dl@b5eff52](https://github.com/animelover1984/youtube-dl/commit/b5eff52dd9ed5565672ea1694b38c9296db3fade) (login and smile formats still don't work) +* [niconico] Add user extractor by [animelover1984](https://github.com/animelover1984) +* [bilibili] Add anthology support by [animelover1984](https://github.com/animelover1984) +* [amcnetworks] Fix extractor by [2ShedsJackson](https://github.com/2ShedsJackson) +* [stitcher] Merge from youtube-dl by [nixxo](https://github.com/nixxo) +* [rcs] Improved extraction by [nixxo](https://github.com/nixxo) +* [linuxacadamy] Improve regex +* [youtube] Show if video is `private`, `unlisted` etc in info (`availability`) by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) +* [youtube] bugfix for channel playlist extraction +* [nbc] Improve metadata extraction by [2ShedsJackson](https://github.com/2ShedsJackson) + + +### 2021.03.15 +* **Split video by chapters**: using option `--split-chapters` + * The output file of the split files can be set with `-o`/`-P` using the prefix `chapter:` + * Additional keys `section_title`, `section_number`, `section_start`, `section_end` are available in the output template +* **Parallel fragment downloads** by [shirt](https://github.com/shirt-dev) + * Use option `--concurrent-fragments` (`-N`) to set the number of threads (default 1) +* Merge youtube-dl: Upto [commit/3be0980](https://github.com/ytdl-org/youtube-dl/commit/3be098010f667b14075e3dfad1e74e5e2becc8ea) +* [zee5] Add Show Extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan) +* [rai] fix drm check [nixxo](https://github.com/nixxo) +* [wimtv] Add extractor by [nixxo](https://github.com/nixxo) +* [mtv] Add mtv.it and extract series metadata by [nixxo](https://github.com/nixxo) +* [pluto.tv] Add extractor by [kevinoconnor7](https://github.com/kevinoconnor7) +* [youtube] Rewrite comment extraction by [coletdjnz](https://github.com/coletdjnz) +* [embedthumbnail] Set mtime correctly +* Refactor some postprocessor/downloader code by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev) + + +### 2021.03.07 +* [youtube] Fix history, mixes, community pages and trending by [pukkandan](https://github.com/pukkandan) and [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix private feeds/playlists on multi-channel accounts by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Extract alerts from continuation by [coletdjnz](https://github.com/coletdjnz) +* [cbs] Add support for ParamountPlus by [shirt](https://github.com/shirt-dev) +* [mxplayer] Rewrite extractor with show support by [pukkandan](https://github.com/pukkandan) and [Ashish0804](https://github.com/Ashish0804) +* [gedi] Improvements from youtube-dl by [nixxo](https://github.com/nixxo) +* [vimeo] Fix videos with password by [teesid](https://github.com/teesid) +* [lbry] Support `lbry://` url by [nixxo](https://github.com/nixxo) +* [bilibili] Change `Accept` header by [pukkandan](https://github.com/pukkandan) and [animelover1984](https://github.com/animelover1984) +* [trovo] Pass origin header +* [rai] Check for DRM by [nixxo](https://github.com/nixxo) +* [downloader] Fix bug for `ffmpeg`/`httpie` +* [update] Fix updater removing the executable bit on some UNIX distros +* [update] Fix current build hash for UNIX +* [docs] Include wget/curl/aria2c install instructions for Unix by [Ashish0804](https://github.com/Ashish0804) +* Fix some videos downloading with `m3u8` extension +* Remove "fixup is ignored" warning when fixup wasn't passed by user + + +### 2021.03.03.2 +* [build] Fix bug + +### 2021.03.03 +* [youtube] Use new browse API for continuation page extraction by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) +* Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev) +* Merge youtube-dl: Upto [2021.03.03](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.03.03) +* [mtv] Fix extractor +* [nick] Fix extractor by [DennyDai](https://github.com/DennyDai) +* [mxplayer] Add new extractor by [codeasashu](https://github.com/codeasashu) +* [youtube] Throw error when `--extractor-retries` are exhausted +* Reduce default of `--extractor-retries` to 3 +* Fix packaging bugs by [hseg](https://github.com/hseg) + + +### 2021.03.01 +* Allow specifying path in `--external-downloader` +* Add option `--sleep-requests` to sleep b/w requests +* Add option `--extractor-retries` to retry on known extractor errors +* Extract comments only when needed +* `--get-comments` doesn't imply `--write-info-json` if `-J`, `-j` or `--print-json` are used +* Fix `get_executable_path` by [shirt](https://github.com/shirt-dev) +* [youtube] Retry on more known errors than just HTTP-5xx +* [youtube] Fix inconsistent `webpage_url` +* [tennistv] Fix format sorting +* [bilibiliaudio] Recognize the file as audio-only +* [hrfensehen] Fix wrong import +* [viki] Fix viki play pass authentication by [RobinD42](https://github.com/RobinD42) +* [readthedocs] Improvements by [shirt](https://github.com/shirt-dev) +* [hls] Fix bug with m3u8 format extraction +* [hls] Enable `--hls-use-mpegts` by default when downloading live-streams +* [embedthumbnail] Fix bug with deleting original thumbnail +* [build] Fix completion paths, zsh pip completion install by [hseg](https://github.com/hseg) +* [ci] Disable download tests unless specifically invoked +* Cleanup some code and fix typos + + +### 2021.02.24 +* Moved project to an organization [yt-dlp](https://github.com/yt-dlp) +* **Completely changed project name to yt-dlp** by [Pccode66](https://github.com/Pccode66) and [pukkandan](https://github.com/pukkandan) + * Also, `youtube-dlc` config files are no longer loaded +* Merge youtube-dl: Upto [commit/4460329](https://github.com/ytdl-org/youtube-dl/commit/44603290e5002153f3ebad6230cc73aef42cc2cd) (except tmz, gedi) +* [Readthedocs](https://yt-dlp.readthedocs.io) support by [shirt](https://github.com/shirt-dev) +* [youtube] Show if video was a live stream in info (`was_live`) +* [Zee5] Add new extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan) +* [jwplatform] Add support for `hyland.com` +* [tennistv] Fix extractor +* [hls] Support media initialization by [shirt](https://github.com/shirt-dev) +* [hls] Added options `--hls-split-discontinuity` to better support media discontinuity by [shirt](https://github.com/shirt-dev) +* [ffmpeg] Allow passing custom arguments before -i using `--ppa "ffmpeg_i1:ARGS"` syntax +* Fix `--windows-filenames` removing `/` from UNIX paths +* [hls] Show warning if pycryptodome is not found +* [docs] Improvements + * Fix documentation of `Extractor Options` + * Document `all` in format selection + * Document `playable_in_embed` in output templates + + +### 2021.02.19 +* Merge youtube-dl: Upto [commit/cf2dbec](https://github.com/ytdl-org/youtube-dl/commit/cf2dbec6301177a1fddf72862de05fa912d9869d) (except kakao) +* [viki] Fix extractor +* [niconico] Extract `channel` and `channel_id` by [kurumigi](https://github.com/kurumigi) +* [youtube] Multiple page support for hashtag URLs +* [youtube] Add more invidious instances +* [youtube] Fix comment extraction when comment text is empty +* Option `--windows-filenames` to force use of windows compatible filenames +* [ExtractAudio] Bugfix +* Don't raise `parser.error` when exiting for update +* [MoveFiles] Fix for when merger can't run +* Changed `--trim-file-name` to `--trim-filenames` to be similar to related options +* Format Sort improvements: + * Prefer `vp9.2` more than other `vp9` codecs + * Remove forced priority of `quality` + * Remove unnecessary `field_preference` and misuse of `preference` from extractors +* Build improvements: + * Fix hash output by [shirt](https://github.com/shirt-dev) + * Lock Python package versions for x86 and use `wheels` by [shirt](https://github.com/shirt-dev) + * Exclude `vcruntime140.dll` from UPX by [jbruchon](https://github.com/jbruchon) + * Set version number based on UTC time, not local time + * Publish on PyPi only if token is set +* [docs] Better document `--prefer-free-formats` and add `--no-prefer-free-format` + + +### 2021.02.15 +* Merge youtube-dl: Upto [2021.02.10](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.10) (except archive.org) +* [niconico] Improved extraction and support encrypted/SMILE movies by [kurumigi](https://github.com/kurumigi), [tsukumijima](https://github.com/tsukumijima), [bbepis](https://github.com/bbepis), [pukkandan](https://github.com/pukkandan) +* Fix HLS AES-128 with multiple keys in external downloaders by [shirt](https://github.com/shirt-dev) +* [youtube_live_chat] Fix by using POST API by [siikamiika](https://github.com/siikamiika) +* [rumble] Add support for video page +* Option `--allow-unplayable-formats` to allow downloading unplayable video formats +* [ExtractAudio] Don't re-encode when file is already in a common audio format +* [youtube] Fix search continuations +* [youtube] Fix for new accounts +* Improve build/updater: by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev) + * Fix SHA256 calculation in build and implement hash checking for updater + * Exit immediately in windows once the update process starts + * Fix updater for `x86.exe` + * Updater looks for both `yt-dlp` and `youtube-dlc` in releases for future-proofing + * Change optional dependency to `pycryptodome` +* Fix issue with unicode filenames in aria2c by [shirt](https://github.com/shirt-dev) +* Fix `allow_playlist_files` not being correctly passed through +* Fix for empty HTTP head requests by [shirt](https://github.com/shirt-dev) +* Fix `get_executable_path` in UNIX +* [sponskrub] Print ffmpeg output and errors to terminal +* `__real_download` should be false when ffmpeg unavailable and no download +* Show `exe`/`zip`/`source` and 32/64bit in verbose message + + +### 2021.02.09 +* **aria2c support for DASH/HLS**: by [shirt](https://github.com/shirt-dev) +* **Implement Updater** (`-U`) by [shirt](https://github.com/shirt-dev) +* [youtube] Fix comment extraction +* [youtube_live_chat] Improve extraction +* [youtube] Fix for channel URLs sometimes not downloading all pages +* [aria2c] Changed default arguments to `--console-log-level=warn --summary-interval=0 --file-allocation=none -x16 -j16 -s16` +* Add fallback for thumbnails +* [embedthumbnail] Keep original thumbnail after conversion if write_thumbnail given +* [embedsubtitle] Keep original subtitle after conversion if write_subtitles given +* [pyinst.py] Move back to root dir +* [youtube] Simplified renderer parsing and bugfixes +* [movefiles] Fix compatibility with python2 +* [remuxvideo] Fix validation of conditional remux +* [sponskrub] Don't raise error when the video does not exist +* [docs] Crypto is an optional dependency + + +### 2021.02.04 +* Merge youtube-dl: Upto [2021.02.04.1](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.04.1) +* **Date/time formatting in output template:** + * You can use [`strftime`](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) to format date/time fields. Example: `%(upload_date>%Y-%m-%d)s` +* **Multiple output templates:** + * Separate output templates can be given for the different metadata files by using `-o TYPE:TEMPLATE` + * The allowed types are: `subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson` +* [youtube] More metadata extraction for channel/playlist URLs (channel, uploader, thumbnail, tags) +* New option `--no-write-playlist-metafiles` to prevent writing playlist metadata files +* [audius] Fix extractor +* [youtube_live_chat] Fix `parse_yt_initial_data` and add `fragment_retries` +* [postprocessor] Raise errors correctly +* [metadatafromtitle] Fix bug when extracting data from numeric fields +* Fix issue with overwriting files +* Fix "Default format spec" appearing in quiet mode +* [FormatSort] Allow user to prefer av01 over vp9 (The default is still vp9) +* [FormatSort] fix bug where `quality` had more priority than `hasvid` +* [pyinst] Automatically detect Python architecture and working directory +* Strip out internal fields such as `_filename` from infojson + + +### 2021.01.29 +* **Features from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl)**: by [animelover1984](https://github.com/animelover1984) and [bbepis](https://github.com/bbepis) + * Add `--get-comments` + * [youtube] Extract comments + * [billibilli] Added BiliBiliSearchIE, BilibiliChannelIE + * [billibilli] Extract comments + * [billibilli] Better video extraction + * Write playlist data to infojson + * [FFmpegMetadata] Embed infojson inside the video + * [EmbedThumbnail] Try embedding in mp4 using ffprobe and `-disposition` + * [EmbedThumbnail] Treat mka like mkv and mov like mp4 + * [EmbedThumbnail] Embed in ogg/opus + * [VideoRemuxer] Conditionally remux video + * [VideoRemuxer] Add `-movflags +faststart` when remuxing to mp4 + * [ffmpeg] Print entire stderr in verbose when there is error + * [EmbedSubtitle] Warn when embedding ass in mp4 + * [anvato] Use NFLTokenGenerator if possible +* **Parse additional metadata**: New option `--parse-metadata` to extract additional metadata from existing fields + * The extracted fields can be used in `--output` + * Deprecated `--metadata-from-title` +* [Audius] Add extractor +* [youtube] Extract playlist description and write it to `.description` file +* Detect existing files even when using `recode`/`remux` (`extract-audio` is partially fixed) +* Fix wrong user config from v2021.01.24 +* [youtube] Report error message from youtube as error instead of warning +* [FormatSort] Fix some fields not sorting from v2021.01.24 +* [postprocessor] Deprecate `avconv`/`avprobe`. All current functionality is left untouched. But don't expect any new features to work with avconv +* [postprocessor] fix `write_debug` to not throw error when there is no `_downloader` +* [movefiles] Don't give "cant find" warning when move is unnecessary +* Refactor `update-version`, `pyinst.py` and related files +* [ffmpeg] Document more formats that are supported for remux/recode + + +### 2021.01.24 +* Merge youtube-dl: Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16) +* Plugin support ([documentation](https://github.com/yt-dlp/yt-dlp#plugins)) +* **Multiple paths**: New option `-P`/`--paths` to give different paths for different types of files + * The syntax is `-P "type:path" -P "type:path"` + * Valid types are: home, temp, description, annotation, subtitle, infojson, thumbnail + * Additionally, configuration file is taken from home directory or current directory +* Allow passing different arguments to different external downloaders +* [mildom] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) +* Warn when using old style `--external-downloader-args` and `--post-processor-args` +* Fix `--no-overwrite` when using `--write-link` +* [sponskrub] Output `unrecognized argument` error message correctly +* [cbs] Make failure to extract title non-fatal +* Fix typecasting when pre-checking archive +* Fix issue with setting title on UNIX +* Deprecate redundant aliases in `formatSort`. The aliases remain functional for backward compatibility, but will be left undocumented +* [tests] Fix test_post_hooks +* [tests] Split core and download tests + + +### 2021.01.20 +* [TrovoLive] Add extractor (only VODs) +* [pokemon] Add `/#/player` URLs +* Improved parsing of multiple postprocessor-args, add `--ppa` as alias +* [EmbedThumbnail] Simplify embedding in mkv +* [sponskrub] Encode filenames correctly, better debug output and error message +* [readme] Cleanup options + + +### 2021.01.16 +* Merge youtube-dl: Upto [2021.01.16](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16) +* **Configuration files:** + * Portable configuration file: `./yt-dlp.conf` + * Allow the configuration files to be named `yt-dlp` instead of `youtube-dlc`. See [this](https://github.com/yt-dlp/yt-dlp#configuration) for details +* Add PyPI release + + +### 2021.01.14 +* Added option `--break-on-reject` +* [roosterteeth.com] Fix for bonus episodes by [Zocker1999NET](https://github.com/Zocker1999NET) +* [tiktok] Fix for when share_info is empty +* [EmbedThumbnail] Fix bug due to incorrect function name +* [docs] Changed sponskrub links to point to [yt-dlp/SponSkrub](https://github.com/yt-dlp/SponSkrub) since I am now providing both linux and windows releases +* [docs] Change all links to correctly point to new fork URL +* [docs] Fixes typos + + +### 2021.01.12 +* [roosterteeth.com] Add subtitle support by [samiksome](https://github.com/samiksome) +* Added `--force-overwrites`, `--no-force-overwrites` by [alxnull](https://github.com/alxnull) +* Changed fork name to `yt-dlp` +* Fix typos by [FelixFrog](https://github.com/FelixFrog) +* [ci] Option to skip +* [changelog] Added unreleased changes in blackjack4494/yt-dlc + + +### 2021.01.10 +* [archive.org] Fix extractor and add support for audio and playlists by [wporr](https://github.com/wporr) +* [Animelab] Added by [mariuszskon](https://github.com/mariuszskon) +* [youtube:search] Fix view_count by [ohnonot](https://github.com/ohnonot) +* [youtube] Show if video is embeddable in info (`playable_in_embed`) +* Update version badge automatically in README +* Enable `test_youtube_search_matching` +* Create `to_screen` and similar functions in postprocessor/common + + +### 2021.01.09 +* [youtube] Fix bug in automatic caption extraction +* Add `post_hooks` to YoutubeDL by [alexmerkel](https://github.com/alexmerkel) +* Batch file enumeration improvements by [glenn-slayden](https://github.com/glenn-slayden) +* Stop immediately when reaching `--max-downloads` by [glenn-slayden](https://github.com/glenn-slayden) +* Fix incorrect ANSI sequence for restoring console-window title by [glenn-slayden](https://github.com/glenn-slayden) +* Kill child processes when yt-dlc is killed by [Unrud](https://github.com/Unrud) + + +### 2021.01.08 +* Merge youtube-dl: Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08) except stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f)) +* Moved changelog to separate file + + +### 2021.01.07-1 +* [Akamai] fix by [nixxo](https://github.com/nixxo) +* [Tiktok] merge youtube-dl tiktok extractor by [GreyAlien502](https://github.com/GreyAlien502) +* [vlive] add support for playlists by [kyuyeunk](https://github.com/kyuyeunk) +* [youtube_live_chat] make sure playerOffsetMs is positive by [siikamiika](https://github.com/siikamiika) +* Ignore extra data streams in ffmpeg by [jbruchon](https://github.com/jbruchon) +* Allow passing different arguments to different postprocessors using `--postprocessor-args` +* Deprecated `--sponskrub-args`. The same can now be done using `--postprocessor-args "sponskrub:<args>"` +* [CI] Split tests into core-test and full-test + + +### 2021.01.07 +* Removed priority of `av01` codec in `-S` since most devices don't support it yet +* Added `duration_string` to be used in `--output` +* Created First Release + + +### 2021.01.05-1 +* **Changed defaults:** + * Enabled `--ignore` + * Disabled `--video-multistreams` and `--audio-multistreams` + * Changed default format selection to `bv*+ba/b` when `--audio-multistreams` is disabled + * Changed default format sort order to `res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id` + * Changed `webm` to be more preferable than `flv` in format sorting + * Changed default output template to `%(title)s [%(id)s].%(ext)s` + * Enabled `--list-formats-as-table` + + +### 2021.01.05 +* **Format Sort:** Added `--format-sort` (`-S`), `--format-sort-force` (`--S-force`) - See [Sorting Formats](README.md#sorting-formats) for details +* **Format Selection:** See [Format Selection](README.md#format-selection) for details + * New format selectors: `best*`, `worst*`, `bestvideo*`, `bestaudio*`, `worstvideo*`, `worstaudio*` + * Changed video format sorting to show video only files and video+audio files together + * Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams` + * Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively +* Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details +* **Sponskrub integration:** Added `--sponskrub`, `--sponskrub-cut`, `--sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` - See [SponSkrub Options](README.md#sponskrub-sponsorblock-options) for details +* Added `--force-download-archive` (`--force-write-archive`) by [h-h-h-h](https://github.com/h-h-h-h) +* Added `--list-formats-as-table`, `--list-formats-old` +* **Negative Options:** Makes it possible to negate most boolean options by adding a `no-` to the switch. Usefull when you want to reverse an option that is defined in a config file + * Added `--no-ignore-dynamic-mpd`, `--no-allow-dynamic-mpd`, `--allow-dynamic-mpd`, `--youtube-include-hls-manifest`, `--no-youtube-include-hls-manifest`, `--no-youtube-skip-hls-manifest`, `--no-download`, `--no-download-archive`, `--resize-buffer`, `--part`, `--mtime`, `--no-keep-fragments`, `--no-cookies`, `--no-write-annotations`, `--no-write-info-json`, `--no-write-description`, `--no-write-thumbnail`, `--youtube-include-dash-manifest`, `--post-overwrites`, `--no-keep-video`, `--no-embed-subs`, `--no-embed-thumbnail`, `--no-add-metadata`, `--no-include-ads`, `--no-write-sub`, `--no-write-auto-sub`, `--no-playlist-reverse`, `--no-restrict-filenames`, `--youtube-include-dash-manifest`, `--no-format-sort-force`, `--flat-videos`, `--no-list-formats-as-table`, `--no-sponskrub`, `--no-sponskrub-cut`, `--no-sponskrub-force` + * Renamed: `--write-subs`, `--no-write-subs`, `--no-write-auto-subs`, `--write-auto-subs`. Note that these can still be used without the ending "s" +* Relaxed validation for format filters so that any arbitrary field can be used +* Fix for embedding thumbnail in mp3 by [pauldubois98](https://github.com/pauldubois98) ([ytdl-org/youtube-dl#21569](https://github.com/ytdl-org/youtube-dl/pull/21569)) +* Make Twitch Video ID output from Playlist and VOD extractor same. This is only a temporary fix +* Merge youtube-dl: Upto [2021.01.03](https://github.com/ytdl-org/youtube-dl/commit/8e953dcbb10a1a42f4e12e4e132657cb0100a1f8) - See [blackjack4494/yt-dlc#280](https://github.com/blackjack4494/yt-dlc/pull/280) for details + * Extractors [tiktok](https://github.com/ytdl-org/youtube-dl/commit/fb626c05867deab04425bad0c0b16b55473841a2) and [hotstar](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc) have not been merged +* Cleaned up the fork for public use + + +**Note**: All uncredited changes above this point are authored by [pukkandan](https://github.com/pukkandan) + +### Unreleased changes in [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) +* Updated to youtube-dl release 2020.11.26 by [pukkandan](https://github.com/pukkandan) +* Youtube improvements by [pukkandan](https://github.com/pukkandan) + * Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and SearchURL + * Fix some improper Youtube URLs + * Redirect channel home to /video + * Print youtube's warning message + * Handle Multiple pages for feeds better +* [youtube] Fix ytsearch not returning results sometimes due to promoted content by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Temporary fix for automatic captions - disable json3 by [blackjack4494](https://github.com/blackjack4494) +* Add --break-on-existing by [gergesh](https://github.com/gergesh) +* Pre-check video IDs in the archive before downloading by [pukkandan](https://github.com/pukkandan) +* [bitwave.tv] New extractor by [lorpus](https://github.com/lorpus) +* [Gedi] Add extractor by [nixxo](https://github.com/nixxo) +* [Rcs] Add new extractor by [nixxo](https://github.com/nixxo) +* [skyit] New skyitalia extractor by [nixxo](https://github.com/nixxo) +* [france.tv] Fix thumbnail URL by [renalid](https://github.com/renalid) +* [ina] support mobile links by [B0pol](https://github.com/B0pol) +* [instagram] Fix thumbnail extractor by [nao20010128nao](https://github.com/nao20010128nao) +* [SouthparkDe] Support for English URLs by [xypwn](https://github.com/xypwn) +* [spreaker] fix SpreakerShowIE test URL by [pukkandan](https://github.com/pukkandan) +* [Vlive] Fix playlist handling when downloading a channel by [kyuyeunk](https://github.com/kyuyeunk) +* [tmz] Fix extractor by [diegorodriguezv](https://github.com/diegorodriguezv) +* [ITV] BTCC URL update by [WolfganP](https://github.com/WolfganP) +* [generic] Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan) +* [generic] Extract embedded youtube and twitter videos by [diegorodriguezv](https://github.com/diegorodriguezv) +* [ffmpeg] Ensure all streams are copied by [pukkandan](https://github.com/pukkandan) +* [embedthumbnail] Fix for os.rename error by [pukkandan](https://github.com/pukkandan) +* make_win.bat: don't use UPX to pack vcruntime140.dll by [jbruchon](https://github.com/jbruchon) + + +### Changelog of [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) till release 2020.11.11-3 + +**Note**: This was constructed from the merge commit messages and may not be entirely accurate + +* [bandcamp] fix failing test. remove subclass hack by [insaneracist](https://github.com/insaneracist) +* [bandcamp] restore album downloads by [insaneracist](https://github.com/insaneracist) +* [francetv] fix extractor by [Surkal](https://github.com/Surkal) +* [gdcvault] fix extractor by [blackjack4494](https://github.com/blackjack4494) +* [hotstar] Move to API v1 by [theincognito-inc](https://github.com/theincognito-inc) +* [hrfernsehen] add extractor by [blocktrron](https://github.com/blocktrron) +* [kakao] new apis by [blackjack4494](https://github.com/blackjack4494) +* [la7] fix missing protocol by [nixxo](https://github.com/nixxo) +* [mailru] removed escaped braces, use urljoin, added tests by [nixxo](https://github.com/nixxo) +* [MTV/Nick] universal mgid extractor + fix nick.de feed by [blackjack4494](https://github.com/blackjack4494) +* [mtv] Fix a missing match_id by [nixxo](https://github.com/nixxo) +* [Mtv] updated extractor logic & more by [blackjack4494](https://github.com/blackjack4494) +* [ndr] support Daserste ndr by [blackjack4494](https://github.com/blackjack4494) +* [Netzkino] Only use video id to find metadata by [TobiX](https://github.com/TobiX) +* [newgrounds] fix: video download by [insaneracist](https://github.com/insaneracist) +* [nitter] Add new extractor by [B0pol](https://github.com/B0pol) +* [soundcloud] Resolve audio/x-wav by [tfvlrue](https://github.com/tfvlrue) +* [soundcloud] sets pattern and tests by [blackjack4494](https://github.com/blackjack4494) +* [SouthparkDE/MTV] another mgid extraction (mtv_base) feed url updated by [blackjack4494](https://github.com/blackjack4494) +* [StoryFire] Add new extractor by [sgstair](https://github.com/sgstair) +* [twitch] by [geauxlo](https://github.com/geauxlo) +* [videa] Adapt to updates by [adrianheine](https://github.com/adrianheine) +* [Viki] subtitles, formats by [blackjack4494](https://github.com/blackjack4494) +* [vlive] fix extractor for revamped website by [exwm](https://github.com/exwm) +* [xtube] fix extractor by [insaneracist](https://github.com/insaneracist) +* [youtube] Convert subs when download is skipped by [blackjack4494](https://github.com/blackjack4494) +* [youtube] Fix age gate detection by [random-nick](https://github.com/random-nick) +* [youtube] fix yt-only playback when age restricted/gated - requires cookies by [blackjack4494](https://github.com/blackjack4494) +* [youtube] fix: extract artist metadata from ytInitialData by [insaneracist](https://github.com/insaneracist) +* [youtube] fix: extract mix playlist ids from ytInitialData by [insaneracist](https://github.com/insaneracist) +* [youtube] fix: mix playlist title by [insaneracist](https://github.com/insaneracist) +* [youtube] fix: Youtube Music playlists by [insaneracist](https://github.com/insaneracist) +* [Youtube] Fixed problem with new youtube player by [peet1993](https://github.com/peet1993) +* [zoom] Fix url parsing for url's containing /share/ and dots by [Romern](https://github.com/Romern) +* [zoom] new extractor by [insaneracist](https://github.com/insaneracist) +* abc by [adrianheine](https://github.com/adrianheine) +* Added Comcast_SSO fix by [merval](https://github.com/merval) +* Added DRM logic to brightcove by [merval](https://github.com/merval) +* Added regex for ABC.com site. by [kucksdorfs](https://github.com/kucksdorfs) +* alura by [hugohaa](https://github.com/hugohaa) +* Arbitrary merges by [fstirlitz](https://github.com/fstirlitz) +* ard.py_add_playlist_support by [martin54](https://github.com/martin54) +* Bugfix/youtube/chapters fix extractor by [gschizas](https://github.com/gschizas) +* bugfix_youtube_like_extraction by [RedpointsBots](https://github.com/RedpointsBots) +* Create build workflow by [blackjack4494](https://github.com/blackjack4494) +* deezer by [LucBerge](https://github.com/LucBerge) +* Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan) +* Don't install tests by [l29ah](https://github.com/l29ah) +* Don't try to embed/convert json subtitles generated by [youtube](https://github.com/youtube) livechat by [pukkandan](https://github.com/pukkandan) +* Doodstream by [sxvghd](https://github.com/sxvghd) +* duboku by [lkho](https://github.com/lkho) +* elonet by [tpikonen](https://github.com/tpikonen) +* ext/remuxe-video by [Zocker1999NET](https://github.com/Zocker1999NET) +* fall-back to the old way to fetch subtitles, if needed by [RobinD42](https://github.com/RobinD42) +* feature_subscriber_count by [RedpointsBots](https://github.com/RedpointsBots) +* Fix external downloader when there is no http_header by [pukkandan](https://github.com/pukkandan) +* Fix issue triggered by [tubeup](https://github.com/tubeup) by [nsapa](https://github.com/nsapa) +* Fix YoutubePlaylistsIE by [ZenulAbidin](https://github.com/ZenulAbidin) +* fix-mitele' by [DjMoren](https://github.com/DjMoren) +* fix/google-drive-cookie-issue by [legraphista](https://github.com/legraphista) +* fix_tiktok by [mervel-mervel](https://github.com/mervel-mervel) +* Fixed problem with JS player URL by [peet1993](https://github.com/peet1993) +* fixYTSearch by [xarantolus](https://github.com/xarantolus) +* FliegendeWurst-3sat-zdf-merger-bugfix-feature +* gilou-bandcamp_update +* implement ThisVid extractor by [rigstot](https://github.com/rigstot) +* JensTimmerman-patch-1 by [JensTimmerman](https://github.com/JensTimmerman) +* Keep download archive in memory for better performance by [jbruchon](https://github.com/jbruchon) +* la7-fix by [iamleot](https://github.com/iamleot) +* magenta by [adrianheine](https://github.com/adrianheine) +* Merge 26564 from [adrianheine](https://github.com/adrianheine) +* Merge code from [ddland](https://github.com/ddland) +* Merge code from [nixxo](https://github.com/nixxo) +* Merge code from [ssaqua](https://github.com/ssaqua) +* Merge code from [zubearc](https://github.com/zubearc) +* mkvthumbnail by [MrDoritos](https://github.com/MrDoritos) +* myvideo_ge by [fonkap](https://github.com/fonkap) +* naver by [SeonjaeHyeon](https://github.com/SeonjaeHyeon) +* ondemandkorea by [julien-hadleyjack](https://github.com/julien-hadleyjack) +* rai-update by [iamleot](https://github.com/iamleot) +* RFC: youtube: Polymer UI and JSON endpoints for playlists by [wlritchi](https://github.com/wlritchi) +* rutv by [adrianheine](https://github.com/adrianheine) +* Sc extractor web auth by [blackjack4494](https://github.com/blackjack4494) +* Switch from binary search tree to Python sets by [jbruchon](https://github.com/jbruchon) +* tiktok by [skyme5](https://github.com/skyme5) +* tvnow by [TinyToweringTree](https://github.com/TinyToweringTree) +* twitch-fix by [lel-amri](https://github.com/lel-amri) +* Twitter shortener by [blackjack4494](https://github.com/blackjack4494) +* Update README.md by [JensTimmerman](https://github.com/JensTimmerman) +* Update to reflect website changes. by [amigatomte](https://github.com/amigatomte) +* use webarchive to fix a dead link in README by [B0pol](https://github.com/B0pol) +* Viki the second by [blackjack4494](https://github.com/blackjack4494) +* wdr-subtitles by [mrtnmtth](https://github.com/mrtnmtth) +* Webpfix by [alexmerkel](https://github.com/alexmerkel) +* Youtube live chat by [siikamiika](https://github.com/siikamiika) diff --git a/Collaborators.md b/Collaborators.md new file mode 100644 index 0000000..894a853 --- /dev/null +++ b/Collaborators.md @@ -0,0 +1,63 @@ +# Collaborators + +This is a list of the collaborators of the project and their major contributions. See the [Changelog](Changelog.md) for more details. + +You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [authors of youtube-dl](https://github.com/ytdl-org/youtube-dl/blob/master/AUTHORS) + + +## [pukkandan](https://github.com/pukkandan) + +[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/pukkandan) +[![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/pukkandan) + +* Owner of the fork + + + +## [shirt](https://github.com/shirt-dev) + +[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/shirt) + +* Multithreading (`-N`) and aria2c support for fragment downloads +* Support for media initialization and discontinuity in HLS +* The self-updater (`-U`) + + + +## [coletdjnz](https://github.com/coletdjnz) + +[![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/coletdjnz) + +* Improved plugin architecture +* Rewrote the networking infrastructure, implemented support for `requests` +* YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements +* Added support for new websites YoutubeWebArchive, MainStreaming, PRX, nzherald, Mediaklikk, StarTV etc +* Improved/fixed support for Patreon, panopto, gfycat, itv, pbs, SouthParkDE etc + + + +## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub> + +[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/ashish0804) + +* Added support for new websites BiliIntl, DiscoveryPlusIndia, OlympicsReplay, PlanetMarathi, ShemarooMe, Utreon, Zee5 etc +* Added playlist/series downloads for Hotstar, ParamountPlus, Rumble, SonyLIV, Trovo, TubiTv, Voot etc +* Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc + + +## [bashonly](https://github.com/bashonly) + +* `--update-to`, self-updater rewrite, automated/nightly/master releases +* `--cookies-from-browser` support for Firefox containers, external downloader cookie handling overhaul +* Added support for new websites like Dacast, Kick, NBCStations, Triller, VideoKen, Weverse, WrestleUniverse etc +* Improved/fixed support for Anvato, Brightcove, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc + + +## [Grub4K](https://github.com/Grub4K) + +[![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K) + +* `--update-to`, self-updater rewrite, automated/nightly/master releases +* Reworked internals like `traverse_obj`, various core refactors and bugs fixes +* Implemented proper progress reporting for parallel downloads +* Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..68a49da --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to <http://unlicense.org/> diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9344003 --- /dev/null +++ b/Makefile @@ -0,0 +1,160 @@ +all: lazy-extractors yt-dlp doc pypi-files +clean: clean-test clean-dist +clean-all: clean clean-cache +completions: completion-bash completion-fish completion-zsh +doc: README.md CONTRIBUTING.md issuetemplates supportedsites +ot: offlinetest +tar: yt-dlp.tar.gz + +# Keep this list in sync with pyproject.toml includes/artifacts +# intended use: when building a source distribution, +# make pypi-files && python3 -m build -sn . +pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ + completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/* + +.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites + +clean-test: + rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ + *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ + *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \ + *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp +clean-dist: + rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ + yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS +clean-cache: + find . \( \ + -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ + \) -prune -exec rm -rf {} \; + +completion-bash: completions/bash/yt-dlp +completion-fish: completions/fish/yt-dlp.fish +completion-zsh: completions/zsh/_yt-dlp +lazy-extractors: yt_dlp/extractor/lazy_extractors.py + +PREFIX ?= /usr/local +BINDIR ?= $(PREFIX)/bin +MANDIR ?= $(PREFIX)/man +SHAREDIR ?= $(PREFIX)/share +PYTHON ?= /usr/bin/env python3 +GNUTAR ?= tar + +# set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2 +PANDOC_VERSION_CMD = pandoc -v 2>/dev/null | head -n1 | cut -d' ' -f2 | head -c1 +PANDOC_VERSION != $(PANDOC_VERSION_CMD) +PANDOC_VERSION ?= $(shell $(PANDOC_VERSION_CMD)) +MARKDOWN_CMD = if [ "$(PANDOC_VERSION)" = "1" -o "$(PANDOC_VERSION)" = "0" ]; then echo markdown; else echo markdown-smart; fi +MARKDOWN != $(MARKDOWN_CMD) +MARKDOWN ?= $(shell $(MARKDOWN_CMD)) + +install: lazy-extractors yt-dlp yt-dlp.1 completions + mkdir -p $(DESTDIR)$(BINDIR) + install -m755 yt-dlp $(DESTDIR)$(BINDIR)/yt-dlp + mkdir -p $(DESTDIR)$(MANDIR)/man1 + install -m644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 + mkdir -p $(DESTDIR)$(SHAREDIR)/bash-completion/completions + install -m644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp + mkdir -p $(DESTDIR)$(SHAREDIR)/zsh/site-functions + install -m644 completions/zsh/_yt-dlp $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp + mkdir -p $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d + install -m644 completions/fish/yt-dlp.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish + +uninstall: + rm -f $(DESTDIR)$(BINDIR)/yt-dlp + rm -f $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 + rm -f $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp + rm -f $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp + rm -f $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish + +codetest: + flake8 . + +test: + $(PYTHON) -m pytest + $(MAKE) codetest + +offlinetest: codetest + $(PYTHON) -m pytest -k "not download" + +CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort +CODE_FOLDERS != $(CODE_FOLDERS_CMD) +CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD)) +CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done +CODE_FILES != $(CODE_FILES_CMD) +CODE_FILES ?= $(shell $(CODE_FILES_CMD)) +yt-dlp: $(CODE_FILES) + mkdir -p zip + for d in $(CODE_FOLDERS) ; do \ + mkdir -p zip/$$d ;\ + cp -pPR $$d/*.py zip/$$d/ ;\ + done + (cd zip && touch -t 200001010101 $(CODE_FILES)) + mv zip/yt_dlp/__main__.py zip/ + (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py) + rm -rf zip + echo '#!$(PYTHON)' > yt-dlp + cat yt-dlp.zip >> yt-dlp + rm yt-dlp.zip + chmod a+x yt-dlp + +README.md: $(CODE_FILES) devscripts/make_readme.py + COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py + +CONTRIBUTING.md: README.md devscripts/make_contributing.py + $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md + +issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml yt_dlp/version.py + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE/1_broken_site.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE/2_site_support_request.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE/3_site_feature_request.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE/4_bug_report.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml .github/ISSUE_TEMPLATE/5_feature_request.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/6_question.yml .github/ISSUE_TEMPLATE/6_question.yml + +supportedsites: + $(PYTHON) devscripts/make_supportedsites.py supportedsites.md + +README.txt: README.md + pandoc -f $(MARKDOWN) -t plain README.md -o README.txt + +yt-dlp.1: README.md devscripts/prepare_manpage.py + $(PYTHON) devscripts/prepare_manpage.py yt-dlp.1.temp.md + pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1 + rm -f yt-dlp.1.temp.md + +completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in + mkdir -p completions/bash + $(PYTHON) devscripts/bash-completion.py + +completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in + mkdir -p completions/zsh + $(PYTHON) devscripts/zsh-completion.py + +completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in + mkdir -p completions/fish + $(PYTHON) devscripts/fish-completion.py + +_EXTRACTOR_FILES_CMD = find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py' +_EXTRACTOR_FILES != $(_EXTRACTOR_FILES_CMD) +_EXTRACTOR_FILES ?= $(shell $(_EXTRACTOR_FILES_CMD)) +yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) + $(PYTHON) devscripts/make_lazy_extractors.py $@ + +yt-dlp.tar.gz: all + @$(GNUTAR) -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \ + --exclude '*.DS_Store' \ + --exclude '*.kate-swp' \ + --exclude '*.pyc' \ + --exclude '*.pyo' \ + --exclude '*~' \ + --exclude '__pycache__' \ + --exclude '.pytest_cache' \ + --exclude '.git' \ + -- \ + README.md supportedsites.md Changelog.md LICENSE \ + CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \ + Makefile yt-dlp.1 README.txt completions .gitignore \ + setup.cfg yt-dlp yt_dlp pyproject.toml devscripts test + +AUTHORS: + git shortlog -s -n HEAD | cut -f2 | sort > AUTHORS diff --git a/README.md b/README.md new file mode 100644 index 0000000..1e108a2 --- /dev/null +++ b/README.md @@ -0,0 +1,2317 @@ +<!-- MANPAGE: BEGIN EXCLUDED SECTION --> +<div align="center"> + +[![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) + +[![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation") +[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") +[![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") +[![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") +[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") +[![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") +[![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") +[![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") +[![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") +[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/pulse/monthly "Last activity") + +</div> +<!-- MANPAGE: END EXCLUDED SECTION --> + +yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project + +<!-- MANPAGE: MOVE "USAGE AND OPTIONS" SECTION HERE --> + +<!-- MANPAGE: BEGIN EXCLUDED SECTION --> +* [INSTALLATION](#installation) + * [Detailed instructions](https://github.com/yt-dlp/yt-dlp/wiki/Installation) + * [Release Files](#release-files) + * [Update](#update) + * [Dependencies](#dependencies) + * [Compile](#compile) +* [USAGE AND OPTIONS](#usage-and-options) + * [General Options](#general-options) + * [Network Options](#network-options) + * [Geo-restriction](#geo-restriction) + * [Video Selection](#video-selection) + * [Download Options](#download-options) + * [Filesystem Options](#filesystem-options) + * [Thumbnail Options](#thumbnail-options) + * [Internet Shortcut Options](#internet-shortcut-options) + * [Verbosity and Simulation Options](#verbosity-and-simulation-options) + * [Workarounds](#workarounds) + * [Video Format Options](#video-format-options) + * [Subtitle Options](#subtitle-options) + * [Authentication Options](#authentication-options) + * [Post-processing Options](#post-processing-options) + * [SponsorBlock Options](#sponsorblock-options) + * [Extractor Options](#extractor-options) +* [CONFIGURATION](#configuration) + * [Configuration file encoding](#configuration-file-encoding) + * [Authentication with netrc](#authentication-with-netrc) + * [Notes about environment variables](#notes-about-environment-variables) +* [OUTPUT TEMPLATE](#output-template) + * [Output template examples](#output-template-examples) +* [FORMAT SELECTION](#format-selection) + * [Filtering Formats](#filtering-formats) + * [Sorting Formats](#sorting-formats) + * [Format Selection examples](#format-selection-examples) +* [MODIFYING METADATA](#modifying-metadata) + * [Modifying metadata examples](#modifying-metadata-examples) +* [EXTRACTOR ARGUMENTS](#extractor-arguments) +* [PLUGINS](#plugins) + * [Installing Plugins](#installing-plugins) + * [Developing Plugins](#developing-plugins) +* [EMBEDDING YT-DLP](#embedding-yt-dlp) + * [Embedding examples](#embedding-examples) +* [CHANGES FROM YOUTUBE-DL](#changes-from-youtube-dl) + * [New features](#new-features) + * [Differences in default behavior](#differences-in-default-behavior) + * [Deprecated options](#deprecated-options) +* [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp) + * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) + * [Developer Instructions](CONTRIBUTING.md#developer-instructions) +* [WIKI](https://github.com/yt-dlp/yt-dlp/wiki) + * [FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ) +<!-- MANPAGE: END EXCLUDED SECTION --> + + +# INSTALLATION + +<!-- MANPAGE: BEGIN EXCLUDED SECTION --> +[![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe) +[![Unix](https://img.shields.io/badge/-Linux/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) +[![MacOS](https://img.shields.io/badge/-MacOS-lightblue.svg?style=for-the-badge&logo=apple)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos) +[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp) +[![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) +[![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) +[![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) +<!-- MANPAGE: END EXCLUDED SECTION --> + +You can install yt-dlp using [the binaries](#release-files), [pip](https://pypi.org/project/yt-dlp) or one using a third-party package manager. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) for detailed instructions + + +<!-- MANPAGE: BEGIN EXCLUDED SECTION --> +## RELEASE FILES + +#### Recommended + +File|Description +:---|:--- +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independent [zipimport](https://docs.python.org/3/library/zipimport.html) binary. Needs Python (recommended for **Linux/BSD**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) +[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|Universal MacOS (10.15+) standalone executable (recommended for **MacOS**) + +#### Alternatives + +File|Description +:---|:--- +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows)) +[yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary +[yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update) +[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary +[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary +[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) +[yt-dlp_macos_legacy](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos_legacy)|MacOS (10.9+) standalone x64 executable + +#### Misc + +File|Description +:---|:--- +[yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball +[SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums +[SHA2-512SUMS.sig](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS.sig)|GPG signature file for SHA512 sums +[SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums +[SHA2-256SUMS.sig](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS.sig)|GPG signature file for SHA256 sums + +The public key that can be used to verify the GPG signatures is [available here](https://github.com/yt-dlp/yt-dlp/blob/master/public.key) +Example usage: +``` +curl -L https://github.com/yt-dlp/yt-dlp/raw/master/public.key | gpg --import +gpg --verify SHA2-256SUMS.sig SHA2-256SUMS +gpg --verify SHA2-512SUMS.sig SHA2-512SUMS +``` +<!-- MANPAGE: END EXCLUDED SECTION --> + +**Note**: The manpages, shell completion (autocomplete) files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) + + +## UPDATE +You can use `yt-dlp -U` to update if you are using the [release binaries](#release-files) + +If you [installed with pip](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program + +For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation + +<a id="update-channels"></a> + +There are currently three release channels for binaries: `stable`, `nightly` and `master`. + +* `stable` is the default channel, and many of its changes have been tested by users of the `nightly` and `master` channels. +* The `nightly` channel has releases scheduled to build every day around midnight UTC, for a snapshot of the project's new patches and changes. This is the **recommended channel for regular users** of yt-dlp. The `nightly` releases are available from [yt-dlp/yt-dlp-nightly-builds](https://github.com/yt-dlp/yt-dlp-nightly-builds/releases) or as development releases of the `yt-dlp` PyPI package (which can be installed with pip's `--pre` flag). +* The `master` channel features releases that are built after each push to the master branch, and these will have the very latest fixes and additions, but may also be more prone to regressions. They are available from [yt-dlp/yt-dlp-master-builds](https://github.com/yt-dlp/yt-dlp-master-builds/releases). + +When using `--update`/`-U`, a release binary will only update to its current channel. +`--update-to CHANNEL` can be used to switch to a different channel when a newer version is available. `--update-to [CHANNEL@]TAG` can also be used to upgrade or downgrade to specific tags from a channel. + +You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories. + +Example usage: +* `yt-dlp --update-to master` switch to the `master` channel and update to its latest release +* `yt-dlp --update-to stable@2023.07.06` upgrade/downgrade to release to `stable` channel tag `2023.07.06` +* `yt-dlp --update-to 2023.10.07` upgrade/downgrade to tag `2023.10.07` if it exists on the current channel +* `yt-dlp --update-to example/yt-dlp@2023.09.24` upgrade/downgrade to the release from the `example/yt-dlp` repository, tag `2023.09.24` + +**Important**: Any user experiencing an issue with the `stable` release should install or update to the `nightly` release before submitting a bug report: +``` +# To update to nightly from stable executable/binary: +yt-dlp --update-to nightly + +# To install nightly with pip: +python3 -m pip install -U --pre yt-dlp[default] +``` + +## DEPENDENCIES +Python versions 3.8+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. + +<!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created +<!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x> +On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually. +--> + +While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended + +### Strongly recommended + +* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) + + There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds + + **Important**: What you need is ffmpeg *binary*, **NOT** [the Python package of the same name](https://pypi.org/project/ffmpeg) + +### Networking +* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) +* [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup> +* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE) +* [**requests**](https://github.com/psf/requests)\* - HTTP library. For HTTPS proxy and persistent connections support. Licensed under [Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE) + +### Metadata + +* [**mutagen**](https://github.com/quodlibet/mutagen)\* - For `--embed-thumbnail` in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) +* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For `--embed-thumbnail` in `mp4`/`m4a` files when `mutagen`/`ffmpeg` cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) +* [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata (`--xattr`) on **Mac** and **BSD**. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively + +### Misc + +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) +* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) +* Any external downloader that you want to use with `--downloader` + +### Deprecated + +* [**avconv** and **avprobe**](https://www.libav.org) - Now **deprecated** alternative to ffmpeg. License [depends on the build](https://libav.org/legal) +* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) +* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) +* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp`/`mms` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) + +To use or redistribute the dependencies, you must agree to their respective licensing terms. + +The standalone release binaries are built with the Python interpreter and the packages marked with **\*** included. + +If you do not have the necessary dependencies for a task you are attempting, yt-dlp will warn you. All the currently available dependencies are visible at the top of the `--verbose` output + + +## COMPILE + +### Standalone PyInstaller Builds +To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). The executable will be built for the same CPU architecture as the Python used. + +You can run the following commands: + +``` +python3 devscripts/install_deps.py --include pyinstaller +python3 devscripts/make_lazy_extractors.py +python3 -m bundle.pyinstaller +``` + +On some systems, you may need to use `py` or `python` instead of `python3`. + +`python -m bundle.pyinstaller` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). + +**Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. + +**Important**: Running `pyinstaller` directly **instead of** using `python -m bundle.pyinstaller` is **not** officially supported. This may or may not work correctly. + +### Platform-independent Binary (UNIX) +You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*. + +After installing these, simply run `make`. + +You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this) + +### Standalone Py2Exe Builds (Windows) + +While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run. + +If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: + +``` +py devscripts/install_deps.py --include py2exe +py devscripts/make_lazy_extractors.py +py -m bundle.py2exe +``` + +### Related scripts + +* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp. +* **`devscripts/update-version.py`** - Update the version number based on current date. +* **`devscripts/set-variant.py`** - Set the build variant of the executable. +* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. +* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. + +Note: See their `--help` for more info. + +### Forking the project +If you fork the project on GitHub, you can run your fork's [build workflow](.github/workflows/build.yml) to automatically build the selected version(s) as artifacts. Alternatively, you can run the [release workflow](.github/workflows/release.yml) or enable the [nightly workflow](.github/workflows/release-nightly.yml) to create full (pre-)releases. + +# USAGE AND OPTIONS + +<!-- MANPAGE: BEGIN EXCLUDED SECTION --> + yt-dlp [OPTIONS] [--] URL [URL...] + +`Ctrl+F` is your friend :D +<!-- MANPAGE: END EXCLUDED SECTION --> + +<!-- Auto generated --> +## General Options: + -h, --help Print this help text and exit + --version Print program version and exit + -U, --update Update this program to the latest version + --no-update Do not check for updates (default) + --update-to [CHANNEL]@[TAG] Upgrade/downgrade to a specific version. + CHANNEL can be a repository as well. CHANNEL + and TAG default to "stable" and "latest" + respectively if omitted; See "UPDATE" for + details. Supported channels: stable, + nightly, master + -i, --ignore-errors Ignore download and postprocessing errors. + The download will be considered successful + even if the postprocessing fails + --no-abort-on-error Continue with next video on download errors; + e.g. to skip unavailable videos in a + playlist (default) + --abort-on-error Abort downloading of further videos if an + error occurs (Alias: --no-ignore-errors) + --dump-user-agent Display the current user-agent and exit + --list-extractors List all supported extractors and exit + --extractor-descriptions Output descriptions of all supported + extractors and exit + --use-extractors NAMES Extractor names to use separated by commas. + You can also use regexes, "all", "default" + and "end" (end URL matching); e.g. --ies + "holodex.*,end,youtube". Prefix the name + with a "-" to exclude it, e.g. --ies + default,-generic. Use --list-extractors for + a list of extractor names. (Alias: --ies) + --default-search PREFIX Use this prefix for unqualified URLs. E.g. + "gvsearch2:python" downloads two videos from + google videos for the search term "python". + Use the value "auto" to let yt-dlp guess + ("auto_warning" to emit a warning when + guessing). "error" just throws an error. The + default value "fixup_error" repairs broken + URLs, but emits an error if this is not + possible instead of searching + --ignore-config Don't load any more configuration files + except those given to --config-locations. + For backward compatibility, if this option + is found inside the system configuration + file, the user configuration is not loaded. + (Alias: --no-config) + --no-config-locations Do not load any custom configuration files + (default). When given inside a configuration + file, ignore all previous --config-locations + defined in the current file + --config-locations PATH Location of the main configuration file; + either the path to the config or its + containing directory ("-" for stdin). Can be + used multiple times and inside other + configuration files + --flat-playlist Do not extract the videos of a playlist, + only list them + --no-flat-playlist Fully extract the videos of a playlist + (default) + --live-from-start Download livestreams from the start. + Currently only supported for YouTube + (Experimental) + --no-live-from-start Download livestreams from the current time + (default) + --wait-for-video MIN[-MAX] Wait for scheduled streams to become + available. Pass the minimum number of + seconds (or range) to wait between retries + --no-wait-for-video Do not wait for scheduled streams (default) + --mark-watched Mark videos watched (even with --simulate) + --no-mark-watched Do not mark videos watched (default) + --color [STREAM:]POLICY Whether to emit color codes in output, + optionally prefixed by the STREAM (stdout or + stderr) to apply the setting to. Can be one + of "always", "auto" (default), "never", or + "no_color" (use non color terminal + sequences). Can be used multiple times + --compat-options OPTS Options that can help keep compatibility + with youtube-dl or youtube-dlc + configurations by reverting some of the + changes made in yt-dlp. See "Differences in + default behavior" for details + --alias ALIASES OPTIONS Create aliases for an option string. Unless + an alias starts with a dash "-", it is + prefixed with "--". Arguments are parsed + according to the Python string formatting + mini-language. E.g. --alias get-audio,-X + "-S=aext:{0},abr -x --audio-format {0}" + creates options "--get-audio" and "-X" that + takes an argument (ARG0) and expands to + "-S=aext:ARG0,abr -x --audio-format ARG0". + All defined aliases are listed in the --help + output. Alias options can trigger more + aliases; so be careful to avoid defining + recursive options. As a safety measure, each + alias may be triggered a maximum of 100 + times. This option can be used multiple times + +## Network Options: + --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. To + enable SOCKS proxy, specify a proper scheme, + e.g. socks5://user:pass@127.0.0.1:1080/. + Pass in an empty string (--proxy "") for + direct connection + --socket-timeout SECONDS Time to wait before giving up, in seconds + --source-address IP Client-side IP address to bind to + -4, --force-ipv4 Make all connections via IPv4 + -6, --force-ipv6 Make all connections via IPv6 + --enable-file-urls Enable file:// URLs. This is disabled by + default for security reasons. + +## Geo-restriction: + --geo-verification-proxy URL Use this proxy to verify the IP address for + some geo-restricted sites. The default proxy + specified by --proxy (or none, if the option + is not present) is used for the actual + downloading + --xff VALUE How to fake X-Forwarded-For HTTP header to + try bypassing geographic restriction. One of + "default" (only when known to be useful), + "never", an IP block in CIDR notation, or a + two-letter ISO 3166-2 country code + +## Video Selection: + -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items + to download. You can specify a range using + "[START]:[STOP][:STEP]". For backward + compatibility, START-STOP is also supported. + Use negative indices to count from the right + and negative STEP to download in reverse + order. E.g. "-I 1:3,7,-5::2" used on a + playlist of size 15 will download the items + at index 1,2,3,7,11,13,15 + --min-filesize SIZE Abort download if filesize is smaller than + SIZE, e.g. 50k or 44.6M + --max-filesize SIZE Abort download if filesize is larger than + SIZE, e.g. 50k or 44.6M + --date DATE Download only videos uploaded on this date. + The date can be "YYYYMMDD" or in the format + [now|today|yesterday][-N[day|week|month|year]]. + E.g. "--date today-2weeks" downloads only + videos uploaded on the same day two weeks ago + --datebefore DATE Download only videos uploaded on or before + this date. The date formats accepted is the + same as --date + --dateafter DATE Download only videos uploaded on or after + this date. The date formats accepted is the + same as --date + --match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE" + field can be compared with a number or a + string using the operators defined in + "Filtering Formats". You can also simply + specify a field to match if the field is + present, use "!field" to check if the field + is not present, and "&" to check multiple + conditions. Use a "\" to escape "&" or + quotes if needed. If used multiple times, + the filter matches if atleast one of the + conditions are met. E.g. --match-filter + !is_live --match-filter "like_count>?100 & + description~='(?i)\bcats \& dogs\b'" matches + only videos that are not live OR those that + have a like count more than 100 (or the like + field is not available) and also has a + description that contains the phrase "cats & + dogs" (caseless). Use "--match-filter -" to + interactively ask whether to download each + video + --no-match-filters Do not use any --match-filter (default) + --break-match-filters FILTER Same as "--match-filters" but stops the + download process when a video is rejected + --no-break-match-filters Do not use any --break-match-filters (default) + --no-playlist Download only the video, if the URL refers + to a video and a playlist + --yes-playlist Download the playlist, if the URL refers to + a video and a playlist + --age-limit YEARS Download only videos suitable for the given + age + --download-archive FILE Download only videos not listed in the + archive file. Record the IDs of all + downloaded videos in it + --no-download-archive Do not use archive file (default) + --max-downloads NUMBER Abort after downloading NUMBER files + --break-on-existing Stop the download process when encountering + a file that is in the archive + --break-per-input Alters --max-downloads, --break-on-existing, + --break-match-filter, and autonumber to + reset per input URL + --no-break-per-input --break-on-existing and similar options + terminates the entire download queue + --skip-playlist-after-errors N Number of allowed failures until the rest of + the playlist is skipped + +## Download Options: + -N, --concurrent-fragments N Number of fragments of a dash/hlsnative + video that should be downloaded concurrently + (default is 1) + -r, --limit-rate RATE Maximum download rate in bytes per second, + e.g. 50K or 4.2M + --throttled-rate RATE Minimum download rate in bytes per second + below which throttling is assumed and the + video data is re-extracted, e.g. 100K + -R, --retries RETRIES Number of retries (default is 10), or + "infinite" + --file-access-retries RETRIES Number of times to retry on file access + error (default is 3), or "infinite" + --fragment-retries RETRIES Number of retries for a fragment (default is + 10), or "infinite" (DASH, hlsnative and ISM) + --retry-sleep [TYPE:]EXPR Time to sleep between retries in seconds + (optionally) prefixed by the type of retry + (http (default), fragment, file_access, + extractor) to apply the sleep to. EXPR can + be a number, linear=START[:END[:STEP=1]] or + exp=START[:END[:BASE=2]]. This option can be + used multiple times to set the sleep for the + different retry types, e.g. --retry-sleep + linear=1::2 --retry-sleep fragment:exp=1:20 + --skip-unavailable-fragments Skip unavailable fragments for DASH, + hlsnative and ISM downloads (default) + (Alias: --no-abort-on-unavailable-fragments) + --abort-on-unavailable-fragments + Abort download if a fragment is unavailable + (Alias: --no-skip-unavailable-fragments) + --keep-fragments Keep downloaded fragments on disk after + downloading is finished + --no-keep-fragments Delete downloaded fragments after + downloading is finished (default) + --buffer-size SIZE Size of download buffer, e.g. 1024 or 16K + (default is 1024) + --resize-buffer The buffer size is automatically resized + from an initial value of --buffer-size + (default) + --no-resize-buffer Do not automatically adjust the buffer size + --http-chunk-size SIZE Size of a chunk for chunk-based HTTP + downloading, e.g. 10485760 or 10M (default + is disabled). May be useful for bypassing + bandwidth throttling imposed by a webserver + (experimental) + --playlist-random Download playlist videos in random order + --lazy-playlist Process entries in the playlist as they are + received. This disables n_entries, + --playlist-random and --playlist-reverse + --no-lazy-playlist Process videos in the playlist only after + the entire playlist is parsed (default) + --xattr-set-filesize Set file xattribute ytdl.filesize with + expected file size + --hls-use-mpegts Use the mpegts container for HLS videos; + allowing some players to play the video + while downloading, and reducing the chance + of file corruption if download is + interrupted. This is enabled by default for + live streams + --no-hls-use-mpegts Do not use the mpegts container for HLS + videos. This is default when not downloading + live streams + --download-sections REGEX Download only chapters that match the + regular expression. A "*" prefix denotes + time-range instead of chapter. Negative + timestamps are calculated from the end. + "*from-url" can be used to download between + the "start_time" and "end_time" extracted + from the URL. Needs ffmpeg. This option can + be used multiple times to download multiple + sections, e.g. --download-sections + "*10:15-inf" --download-sections "intro" + --downloader [PROTO:]NAME Name or path of the external downloader to + use (optionally) prefixed by the protocols + (http, ftp, m3u8, dash, rstp, rtmp, mms) to + use it for. Currently supports native, + aria2c, avconv, axel, curl, ffmpeg, httpie, + wget. You can use this option multiple times + to set different downloaders for different + protocols. E.g. --downloader aria2c + --downloader "dash,m3u8:native" will use + aria2c for http/ftp downloads, and the + native downloader for dash/m3u8 downloads + (Alias: --external-downloader) + --downloader-args NAME:ARGS Give these arguments to the external + downloader. Specify the downloader name and + the arguments separated by a colon ":". For + ffmpeg, arguments can be passed to different + positions using the same syntax as + --postprocessor-args. You can use this + option multiple times to give different + arguments to different downloaders (Alias: + --external-downloader-args) + +## Filesystem Options: + -a, --batch-file FILE File containing URLs to download ("-" for + stdin), one URL per line. Lines starting + with "#", ";" or "]" are considered as + comments and ignored + --no-batch-file Do not read URLs from batch file (default) + -P, --paths [TYPES:]PATH The paths where the files should be + downloaded. Specify the type of file and the + path separated by a colon ":". All the same + TYPES as --output are supported. + Additionally, you can also provide "home" + (default) and "temp" paths. All intermediary + files are first downloaded to the temp path + and then the final files are moved over to + the home path after download is finished. + This option is ignored if --output is an + absolute path + -o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT + TEMPLATE" for details + --output-na-placeholder TEXT Placeholder for unavailable fields in + --output (default: "NA") + --restrict-filenames Restrict filenames to only ASCII characters, + and avoid "&" and spaces in filenames + --no-restrict-filenames Allow Unicode characters, "&" and spaces in + filenames (default) + --windows-filenames Force filenames to be Windows-compatible + --no-windows-filenames Make filenames Windows-compatible only if + using Windows (default) + --trim-filenames LENGTH Limit the filename length (excluding + extension) to the specified number of + characters + -w, --no-overwrites Do not overwrite any files + --force-overwrites Overwrite all video and metadata files. This + option includes --no-continue + --no-force-overwrites Do not overwrite the video, but overwrite + related files (default) + -c, --continue Resume partially downloaded files/fragments + (default) + --no-continue Do not resume partially downloaded + fragments. If the file is not fragmented, + restart download of the entire file + --part Use .part files instead of writing directly + into output file (default) + --no-part Do not use .part files - write directly into + output file + --mtime Use the Last-modified header to set the file + modification time (default) + --no-mtime Do not use the Last-modified header to set + the file modification time + --write-description Write video description to a .description file + --no-write-description Do not write video description (default) + --write-info-json Write video metadata to a .info.json file + (this may contain personal information) + --no-write-info-json Do not write video metadata (default) + --write-playlist-metafiles Write playlist metadata in addition to the + video metadata when using --write-info-json, + --write-description etc. (default) + --no-write-playlist-metafiles Do not write playlist metadata when using + --write-info-json, --write-description etc. + --clean-info-json Remove some internal metadata such as + filenames from the infojson (default) + --no-clean-info-json Write all fields to the infojson + --write-comments Retrieve video comments to be placed in the + infojson. The comments are fetched even + without this option if the extraction is + known to be quick (Alias: --get-comments) + --no-write-comments Do not retrieve video comments unless the + extraction is known to be quick (Alias: + --no-get-comments) + --load-info-json FILE JSON file containing the video information + (created with the "--write-info-json" option) + --cookies FILE Netscape formatted file to read cookies from + and dump cookie jar in + --no-cookies Do not read/dump cookies from/to file + (default) + --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER] + The name of the browser to load cookies + from. Currently supported browsers are: + brave, chrome, chromium, edge, firefox, + opera, safari, vivaldi. Optionally, the + KEYRING used for decrypting Chromium cookies + on Linux, the name/path of the PROFILE to + load cookies from, and the CONTAINER name + (if Firefox) ("none" for no container) can + be given with their respective seperators. + By default, all containers of the most + recently accessed profile are used. + Currently supported keyrings are: basictext, + gnomekeyring, kwallet, kwallet5, kwallet6 + --no-cookies-from-browser Do not load cookies from browser (default) + --cache-dir DIR Location in the filesystem where yt-dlp can + store some downloaded information (such as + client ids and signatures) permanently. By + default ${XDG_CACHE_HOME}/yt-dlp + --no-cache-dir Disable filesystem caching + --rm-cache-dir Delete all filesystem cache files + +## Thumbnail Options: + --write-thumbnail Write thumbnail image to disk + --no-write-thumbnail Do not write thumbnail image to disk (default) + --write-all-thumbnails Write all thumbnail image formats to disk + --list-thumbnails List available thumbnails of each video. + Simulate unless --no-simulate is used + +## Internet Shortcut Options: + --write-link Write an internet shortcut file, depending + on the current platform (.url, .webloc or + .desktop). The URL may be cached by the OS + --write-url-link Write a .url Windows internet shortcut. The + OS caches the URL based on the file path + --write-webloc-link Write a .webloc macOS internet shortcut + --write-desktop-link Write a .desktop Linux internet shortcut + +## Verbosity and Simulation Options: + -q, --quiet Activate quiet mode. If used with --verbose, + print the log to stderr + --no-quiet Deactivate quiet mode. (Default) + --no-warnings Ignore warnings + -s, --simulate Do not download the video and do not write + anything to disk + --no-simulate Download the video even if printing/listing + options are used + --ignore-no-formats-error Ignore "No video formats" error. Useful for + extracting metadata even if the videos are + not actually available for download + (experimental) + --no-ignore-no-formats-error Throw error when no downloadable video + formats are found (default) + --skip-download Do not download the video but write all + related files (Alias: --no-download) + -O, --print [WHEN:]TEMPLATE Field name or output template to print to + screen, optionally prefixed with when to + print it, separated by a ":". Supported + values of "WHEN" are the same as that of + --use-postprocessor (default: video). + Implies --quiet. Implies --simulate unless + --no-simulate or later stages of WHEN are + used. This option can be used multiple times + --print-to-file [WHEN:]TEMPLATE FILE + Append given template to the file. The + values of WHEN and TEMPLATE are same as that + of --print. FILE uses the same syntax as the + output template. This option can be used + multiple times + -j, --dump-json Quiet, but print JSON information for each + video. Simulate unless --no-simulate is + used. See "OUTPUT TEMPLATE" for a + description of available keys + -J, --dump-single-json Quiet, but print JSON information for each + url or infojson passed. Simulate unless + --no-simulate is used. If the URL refers to + a playlist, the whole playlist information + is dumped in a single line + --force-write-archive Force download archive entries to be written + as far as no errors occur, even if -s or + another simulation option is used (Alias: + --force-download-archive) + --newline Output progress bar as new lines + --no-progress Do not print progress bar + --progress Show progress bar, even if in quiet mode + --console-title Display progress in console titlebar + --progress-template [TYPES:]TEMPLATE + Template for progress outputs, optionally + prefixed with one of "download:" (default), + "download-title:" (the console title), + "postprocess:", or "postprocess-title:". + The video's fields are accessible under the + "info" key and the progress attributes are + accessible under "progress" key. E.g. + --console-title --progress-template + "download-title:%(info.id)s-%(progress.eta)s" + -v, --verbose Print various debugging information + --dump-pages Print downloaded pages encoded using base64 + to debug problems (very verbose) + --write-pages Write downloaded intermediary pages to files + in the current directory to debug problems + --print-traffic Display sent and read HTTP traffic + +## Workarounds: + --encoding ENCODING Force the specified encoding (experimental) + --legacy-server-connect Explicitly allow HTTPS connection to servers + that do not support RFC 5746 secure + renegotiation + --no-check-certificates Suppress HTTPS certificate validation + --prefer-insecure Use an unencrypted connection to retrieve + information about the video (Currently + supported only for YouTube) + --add-headers FIELD:VALUE Specify a custom HTTP header and its value, + separated by a colon ":". You can use this + option multiple times + --bidi-workaround Work around terminals that lack + bidirectional text support. Requires bidiv + or fribidi executable in PATH + --sleep-requests SECONDS Number of seconds to sleep between requests + during data extraction + --sleep-interval SECONDS Number of seconds to sleep before each + download. This is the minimum time to sleep + when used along with --max-sleep-interval + (Alias: --min-sleep-interval) + --max-sleep-interval SECONDS Maximum number of seconds to sleep. Can only + be used along with --min-sleep-interval + --sleep-subtitles SECONDS Number of seconds to sleep before each + subtitle download + +## Video Format Options: + -f, --format FORMAT Video format code, see "FORMAT SELECTION" + for more details + -S, --format-sort SORTORDER Sort the formats by the fields given, see + "Sorting Formats" for more details + --format-sort-force Force user specified sort order to have + precedence over all fields, see "Sorting + Formats" for more details (Alias: --S-force) + --no-format-sort-force Some fields have precedence over the user + specified sort order (default) + --video-multistreams Allow multiple video streams to be merged + into a single file + --no-video-multistreams Only one video stream is downloaded for each + output file (default) + --audio-multistreams Allow multiple audio streams to be merged + into a single file + --no-audio-multistreams Only one audio stream is downloaded for each + output file (default) + --prefer-free-formats Prefer video formats with free containers + over non-free ones of same quality. Use with + "-S ext" to strictly prefer free containers + irrespective of quality + --no-prefer-free-formats Don't give any special preference to free + containers (default) + --check-formats Make sure formats are selected only from + those that are actually downloadable + --check-all-formats Check all formats for whether they are + actually downloadable + --no-check-formats Do not check that the formats are actually + downloadable + -F, --list-formats List available formats of each video. + Simulate unless --no-simulate is used + --merge-output-format FORMAT Containers that may be used when merging + formats, separated by "/", e.g. "mp4/mkv". + Ignored if no merge is required. (currently + supported: avi, flv, mkv, mov, mp4, webm) + +## Subtitle Options: + --write-subs Write subtitle file + --no-write-subs Do not write subtitle file (default) + --write-auto-subs Write automatically generated subtitle file + (Alias: --write-automatic-subs) + --no-write-auto-subs Do not write auto-generated subtitles + (default) (Alias: --no-write-automatic-subs) + --list-subs List available subtitles of each video. + Simulate unless --no-simulate is used + --sub-format FORMAT Subtitle format; accepts formats preference, + e.g. "srt" or "ass/srt/best" + --sub-langs LANGS Languages of the subtitles to download (can + be regex) or "all" separated by commas, e.g. + --sub-langs "en.*,ja". You can prefix the + language code with a "-" to exclude it from + the requested languages, e.g. --sub-langs + all,-live_chat. Use --list-subs for a list + of available language tags + +## Authentication Options: + -u, --username USERNAME Login with this account ID + -p, --password PASSWORD Account password. If this option is left + out, yt-dlp will ask interactively + -2, --twofactor TWOFACTOR Two-factor authentication code + -n, --netrc Use .netrc authentication data + --netrc-location PATH Location of .netrc authentication data; + either the path or its containing directory. + Defaults to ~/.netrc + --netrc-cmd NETRC_CMD Command to execute to get the credentials + for an extractor. + --video-password PASSWORD Video-specific password + --ap-mso MSO Adobe Pass multiple-system operator (TV + provider) identifier, use --ap-list-mso for + a list of available MSOs + --ap-username USERNAME Multiple-system operator account login + --ap-password PASSWORD Multiple-system operator account password. + If this option is left out, yt-dlp will ask + interactively + --ap-list-mso List all supported multiple-system operators + --client-certificate CERTFILE Path to client certificate file in PEM + format. May include the private key + --client-certificate-key KEYFILE + Path to private key file for client + certificate + --client-certificate-password PASSWORD + Password for client certificate private key, + if encrypted. If not provided, and the key + is encrypted, yt-dlp will ask interactively + +## Post-Processing Options: + -x, --extract-audio Convert video files to audio-only files + (requires ffmpeg and ffprobe) + --audio-format FORMAT Format to convert the audio to when -x is + used. (currently supported: best (default), + aac, alac, flac, m4a, mp3, opus, vorbis, + wav). You can specify multiple rules using + similar syntax as --remux-video + --audio-quality QUALITY Specify ffmpeg audio quality to use when + converting the audio with -x. Insert a value + between 0 (best) and 10 (worst) for VBR or a + specific bitrate like 128K (default 5) + --remux-video FORMAT Remux the video into another container if + necessary (currently supported: avi, flv, + gif, mkv, mov, mp4, webm, aac, aiff, alac, + flac, m4a, mka, mp3, ogg, opus, vorbis, + wav). If target container does not support + the video/audio codec, remuxing will fail. + You can specify multiple rules; e.g. + "aac>m4a/mov>mp4/mkv" will remux aac to m4a, + mov to mp4 and anything else to mkv + --recode-video FORMAT Re-encode the video into another format if + necessary. The syntax and supported formats + are the same as --remux-video + --postprocessor-args NAME:ARGS Give these arguments to the postprocessors. + Specify the postprocessor/executable name + and the arguments separated by a colon ":" + to give the argument to the specified + postprocessor/executable. Supported PP are: + Merger, ModifyChapters, SplitChapters, + ExtractAudio, VideoRemuxer, VideoConvertor, + Metadata, EmbedSubtitle, EmbedThumbnail, + SubtitlesConvertor, ThumbnailsConvertor, + FixupStretched, FixupM4a, FixupM3u8, + FixupTimestamp and FixupDuration. The + supported executables are: AtomicParsley, + FFmpeg and FFprobe. You can also specify + "PP+EXE:ARGS" to give the arguments to the + specified executable only when being used by + the specified postprocessor. Additionally, + for ffmpeg/ffprobe, "_i"/"_o" can be + appended to the prefix optionally followed + by a number to pass the argument before the + specified input/output file, e.g. --ppa + "Merger+ffmpeg_i1:-v quiet". You can use + this option multiple times to give different + arguments to different postprocessors. + (Alias: --ppa) + -k, --keep-video Keep the intermediate video file on disk + after post-processing + --no-keep-video Delete the intermediate video file after + post-processing (default) + --post-overwrites Overwrite post-processed files (default) + --no-post-overwrites Do not overwrite post-processed files + --embed-subs Embed subtitles in the video (only for mp4, + webm and mkv videos) + --no-embed-subs Do not embed subtitles (default) + --embed-thumbnail Embed thumbnail in the video as cover art + --no-embed-thumbnail Do not embed thumbnail (default) + --embed-metadata Embed metadata to the video file. Also + embeds chapters/infojson if present unless + --no-embed-chapters/--no-embed-info-json are + used (Alias: --add-metadata) + --no-embed-metadata Do not add metadata to file (default) + (Alias: --no-add-metadata) + --embed-chapters Add chapter markers to the video file + (Alias: --add-chapters) + --no-embed-chapters Do not add chapter markers (default) (Alias: + --no-add-chapters) + --embed-info-json Embed the infojson as an attachment to + mkv/mka video files + --no-embed-info-json Do not embed the infojson as an attachment + to the video file + --parse-metadata [WHEN:]FROM:TO + Parse additional metadata like title/artist + from other fields; see "MODIFYING METADATA" + for details. Supported values of "WHEN" are + the same as that of --use-postprocessor + (default: pre_process) + --replace-in-metadata [WHEN:]FIELDS REGEX REPLACE + Replace text in a metadata field using the + given regex. This option can be used + multiple times. Supported values of "WHEN" + are the same as that of --use-postprocessor + (default: pre_process) + --xattrs Write metadata to the video file's xattrs + (using dublin core and xdg standards) + --concat-playlist POLICY Concatenate videos in a playlist. One of + "never", "always", or "multi_video" + (default; only when the videos form a single + show). All the video files must have same + codecs and number of streams to be + concatable. The "pl_video:" prefix can be + used with "--paths" and "--output" to set + the output filename for the concatenated + files. See "OUTPUT TEMPLATE" for details + --fixup POLICY Automatically correct known faults of the + file. One of never (do nothing), warn (only + emit a warning), detect_or_warn (the + default; fix file if we can, warn + otherwise), force (try fixing even if file + already exists) + --ffmpeg-location PATH Location of the ffmpeg binary; either the + path to the binary or its containing directory + --exec [WHEN:]CMD Execute a command, optionally prefixed with + when to execute it, separated by a ":". + Supported values of "WHEN" are the same as + that of --use-postprocessor (default: + after_move). Same syntax as the output + template can be used to pass any field as + arguments to the command. If no fields are + passed, %(filepath,_filename|)q is appended + to the end of the command. This option can + be used multiple times + --no-exec Remove any previously defined --exec + --convert-subs FORMAT Convert the subtitles to another format + (currently supported: ass, lrc, srt, vtt) + (Alias: --convert-subtitles) + --convert-thumbnails FORMAT Convert the thumbnails to another format + (currently supported: jpg, png, webp). You + can specify multiple rules using similar + syntax as --remux-video + --split-chapters Split video into multiple files based on + internal chapters. The "chapter:" prefix can + be used with "--paths" and "--output" to set + the output filename for the split files. See + "OUTPUT TEMPLATE" for details + --no-split-chapters Do not split video based on chapters (default) + --remove-chapters REGEX Remove chapters whose title matches the + given regular expression. The syntax is the + same as --download-sections. This option can + be used multiple times + --no-remove-chapters Do not remove any chapters from the file + (default) + --force-keyframes-at-cuts Force keyframes at cuts when + downloading/splitting/removing sections. + This is slow due to needing a re-encode, but + the resulting video may have fewer artifacts + around the cuts + --no-force-keyframes-at-cuts Do not force keyframes around the chapters + when cutting/splitting (default) + --use-postprocessor NAME[:ARGS] + The (case sensitive) name of plugin + postprocessors to be enabled, and + (optionally) arguments to be passed to it, + separated by a colon ":". ARGS are a + semicolon ";" delimited list of NAME=VALUE. + The "when" argument determines when the + postprocessor is invoked. It can be one of + "pre_process" (after video extraction), + "after_filter" (after video passes filter), + "video" (after --format; before + --print/--output), "before_dl" (before each + video download), "post_process" (after each + video download; default), "after_move" + (after moving video file to it's final + locations), "after_video" (after downloading + and processing all formats of a video), or + "playlist" (at end of playlist). This option + can be used multiple times to add different + postprocessors + +## SponsorBlock Options: +Make chapter entries for, or remove various segments (sponsor, + introductions, etc.) from downloaded YouTube videos using the + [SponsorBlock API](https://sponsor.ajay.app) + + --sponsorblock-mark CATS SponsorBlock categories to create chapters + for, separated by commas. Available + categories are sponsor, intro, outro, + selfpromo, preview, filler, interaction, + music_offtopic, poi_highlight, chapter, all + and default (=all). You can prefix the + category with a "-" to exclude it. See [1] + for description of the categories. E.g. + --sponsorblock-mark all,-preview + [1] https://wiki.sponsor.ajay.app/w/Segment_Categories + --sponsorblock-remove CATS SponsorBlock categories to be removed from + the video file, separated by commas. If a + category is present in both mark and remove, + remove takes precedence. The syntax and + available categories are the same as for + --sponsorblock-mark except that "default" + refers to "all,-filler" and poi_highlight, + chapter are not available + --sponsorblock-chapter-title TEMPLATE + An output template for the title of the + SponsorBlock chapters created by + --sponsorblock-mark. The only available + fields are start_time, end_time, category, + categories, name, category_names. Defaults + to "[SponsorBlock]: %(category_names)l" + --no-sponsorblock Disable both --sponsorblock-mark and + --sponsorblock-remove + --sponsorblock-api URL SponsorBlock API location, defaults to + https://sponsor.ajay.app + +## Extractor Options: + --extractor-retries RETRIES Number of retries for known extractor errors + (default is 3), or "infinite" + --allow-dynamic-mpd Process dynamic DASH manifests (default) + (Alias: --no-ignore-dynamic-mpd) + --ignore-dynamic-mpd Do not process dynamic DASH manifests + (Alias: --no-allow-dynamic-mpd) + --hls-split-discontinuity Split HLS playlists to different formats at + discontinuities such as ad breaks + --no-hls-split-discontinuity Do not split HLS playlists to different + formats at discontinuities such as ad breaks + (default) + --extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor. + See "EXTRACTOR ARGUMENTS" for details. You + can use this option multiple times to give + arguments for different extractors + +# CONFIGURATION + +You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations: + +1. **Main Configuration**: + * The file given to `--config-location` +1. **Portable Configuration**: (Recommended for portable installations) + * If using a binary, `yt-dlp.conf` in the same directory as the binary + * If running from source-code, `yt-dlp.conf` in the parent directory of `yt_dlp` +1. **Home Configuration**: + * `yt-dlp.conf` in the home path given to `-P` + * If `-P` is not given, the current directory is searched +1. **User Configuration**: + * `${XDG_CONFIG_HOME}/yt-dlp.conf` + * `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp/config.txt` + * `${APPDATA}/yt-dlp.conf` + * `${APPDATA}/yt-dlp/config` (recommended on Windows) + * `${APPDATA}/yt-dlp/config.txt` + * `~/yt-dlp.conf` + * `~/yt-dlp.conf.txt` + * `~/.yt-dlp/config` + * `~/.yt-dlp/config.txt` + + See also: [Notes about environment variables](#notes-about-environment-variables) +1. **System Configuration**: + * `/etc/yt-dlp.conf` + * `/etc/yt-dlp/config` + * `/etc/yt-dlp/config.txt` + +E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: +``` +# Lines starting with # are comments + +# Always extract audio +-x + +# Do not copy the mtime +--no-mtime + +# Use this proxy +--proxy 127.0.0.1:3128 + +# Save all videos under YouTube directory in your home directory +-o ~/YouTube/%(title)s.%(ext)s +``` + +**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell. + +You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. + +### Configuration file encoding + +The configuration files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise. + +If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM. + +### Authentication with netrc + +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you: +``` +touch ${HOME}/.netrc +chmod a-rwx,u+rw ${HOME}/.netrc +``` +After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase: +``` +machine <extractor> login <username> password <password> +``` +E.g. +``` +machine youtube login myaccount@gmail.com password my_youtube_password +machine twitch login my_twitch_account_name password my_twitch_password +``` +To activate authentication with the `.netrc` file you should pass `--netrc` to yt-dlp or place it in the [configuration file](#configuration). + +The default location of the .netrc file is `~` (see below). + +As an alternative to using the `.netrc` file, which has the disadvantage of keeping your passwords in a plain text file, you can configure a custom shell command to provide the credentials for an extractor. This is done by providing the `--netrc-cmd` parameter, it shall output the credentials in the netrc format and return `0` on success, other values will be treated as an error. `{}` in the command will be replaced by the name of the extractor to make it possible to select the credentials for the right extractor. + +E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg` +``` +yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' https://www.youtube.com/watch?v=BaW_jenozKc +``` + + +### Notes about environment variables +* Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation +* yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` +* If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache` +* On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise +* On Windows, `${USERPROFILE}` generally points to `C:\Users\<user name>` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming` + +# OUTPUT TEMPLATE + +The `-o` option is used to indicate a template for the output file names while `-P` option is used to specify the path each type of file should be saved to. + +<!-- MANPAGE: BEGIN EXCLUDED SECTION --> +**tl;dr:** [navigate me to examples](#output-template-examples). +<!-- MANPAGE: END EXCLUDED SECTION --> + +The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). + +It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), e.g. `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. + +The field names themselves (the part inside the parenthesis) can also have some special formatting: + +1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields + +1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` + +1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. E.g. `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` + +1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s` + +1. **Replacement**: A replacement value can be specified using a `&` separator according to the [`str.format` mini-language](https://docs.python.org/3/library/string.html#format-specification-mini-language). If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. E.g. `%(chapters&has chapters|no chapters)s`, `%(title&TITLE={:>20}|NO TITLE)s` + +1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s` + +1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted) + +1. **Unicode normalization**: The format type `U` can be used for NFC [Unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC + +To summarize, the general syntax for a field is: +``` +%(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type +``` + +Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. + +<a id="outtmpl-postprocess-note"></a> + +**Note**: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete. + +The available fields are: + + - `id` (string): Video identifier + - `title` (string): Video title + - `fulltitle` (string): Video title ignoring live timestamp and generic title + - `ext` (string): Video filename extension + - `alt_title` (string): A secondary title of the video + - `description` (string): The description of the video + - `display_id` (string): An alternative identifier for the video + - `uploader` (string): Full name of the video uploader + - `uploader_id` (string): Nickname or id of the video uploader + - `uploader_url` (string): URL to the video uploader's profile + - `license` (string): License name the video is licensed under + - `creators` (list): The creators of the video + - `creator` (string): The creators of the video; comma-separated + - `timestamp` (numeric): UNIX timestamp of the moment the video became available + - `upload_date` (string): Video upload date in UTC (YYYYMMDD) + - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released + - `release_date` (string): The date (YYYYMMDD) when the video was released in UTC + - `release_year` (numeric): Year (YYYY) when the video or album was released + - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified + - `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC + - `channel` (string): Full name of the channel the video is uploaded on + - `channel_id` (string): Id of the channel + - `channel_url` (string): URL of the channel + - `channel_follower_count` (numeric): Number of followers of the channel + - `channel_is_verified` (boolean): Whether the channel is verified on the platform + - `location` (string): Physical location where the video was filmed + - `duration` (numeric): Length of the video in seconds + - `duration_string` (string): Length of the video (HH:mm:ss) + - `view_count` (numeric): How many users have watched the video on the platform + - `concurrent_view_count` (numeric): How many users are currently watching the video on the platform. + - `like_count` (numeric): Number of positive ratings of the video + - `dislike_count` (numeric): Number of negative ratings of the video + - `repost_count` (numeric): Number of reposts of the video + - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage + - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used) + - `age_limit` (numeric): Age restriction for the video (years) + - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed) + - `is_live` (boolean): Whether this video is a live stream or a fixed-length video + - `was_live` (boolean): Whether this video was originally a live stream + - `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites + - `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public" + - `media_type` (string): The type of media as classified by the site, e.g. "episode", "clip", "trailer" + - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL + - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL + - `extractor` (string): Name of the extractor + - `extractor_key` (string): Key name of the extractor + - `epoch` (numeric): Unix epoch of when the information extraction was completed + - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`, padded with leading zeros to 5 digits + - `video_autonumber` (numeric): Number that will be increased with each video + - `n_entries` (numeric): Total number of extracted items in the playlist + - `playlist_id` (string): Identifier of the playlist that contains the video + - `playlist_title` (string): Name of the playlist that contains the video + - `playlist` (string): `playlist_id` or `playlist_title` + - `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted + - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index + - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist + - `playlist_uploader` (string): Full name of the playlist uploader + - `playlist_uploader_id` (string): Nickname or id of the playlist uploader + - `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again + - `webpage_url_basename` (string): The basename of the webpage URL + - `webpage_url_domain` (string): The domain of the webpage URL + - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) + - `categories` (list): List of categories the video belongs to + - `tags` (list): List of tags assigned to the video + - `cast` (list): List of cast members + +All the fields in [Filtering Formats](#filtering-formats) can also be used + +Available for the video that belongs to some logical chapter or section: + + - `chapter` (string): Name or title of the chapter the video belongs to + - `chapter_number` (numeric): Number of the chapter the video belongs to + - `chapter_id` (string): Id of the chapter the video belongs to + +Available for the video that is an episode of some series or programme: + + - `series` (string): Title of the series or programme the video episode belongs to + - `series_id` (string): Id of the series or programme the video episode belongs to + - `season` (string): Title of the season the video episode belongs to + - `season_number` (numeric): Number of the season the video episode belongs to + - `season_id` (string): Id of the season the video episode belongs to + - `episode` (string): Title of the video episode + - `episode_number` (numeric): Number of the video episode within a season + - `episode_id` (string): Id of the video episode + +Available for the media that is a track or a part of a music album: + + - `track` (string): Title of the track + - `track_number` (numeric): Number of the track within an album or a disc + - `track_id` (string): Id of the track + - `artists` (list): Artist(s) of the track + - `artist` (string): Artist(s) of the track; comma-separated + - `genres` (list): Genre(s) of the track + - `genre` (string): Genre(s) of the track; comma-separated + - `composers` (list): Composer(s) of the piece + - `composer` (string): Composer(s) of the piece; comma-separated + - `album` (string): Title of the album the track belongs to + - `album_type` (string): Type of the album + - `album_artists` (list): All artists appeared on the album + - `album_artist` (string): All artists appeared on the album; comma-separated + - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to + +Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: + + - `section_title` (string): Title of the chapter + - `section_number` (numeric): Number of the chapter within the file + - `section_start` (numeric): Start time of the chapter in seconds + - `section_end` (numeric): End time of the chapter in seconds + +Available only when used in `--print`: + + - `urls` (string): The URLs of all requested formats, one in each line + - `filename` (string): Name of the video file. Note that the [actual filename may differ](#outtmpl-postprocess-note) + - `formats_table` (table): The video format table as printed by `--list-formats` + - `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails` + - `subtitles_table` (table): The subtitle format table as printed by `--list-subs` + - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs` + + Available only after the video is downloaded (`post_process`/`after_move`): + + - `filepath`: Actual path of downloaded video file + +Available only in `--sponsorblock-chapter-title`: + + - `start_time` (numeric): Start time of the chapter in seconds + - `end_time` (numeric): End time of the chapter in seconds + - `categories` (list): The [SponsorBlock categories](https://wiki.sponsor.ajay.app/w/Types#Category) the chapter belongs to + - `category` (string): The smallest SponsorBlock category the chapter belongs to + - `category_names` (list): Friendly names of the categories + - `name` (string): Friendly name of the smallest category + - `type` (string): The [SponsorBlock action type](https://wiki.sponsor.ajay.app/w/Types#Action_Type) of the chapter + +Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. + +**Note**: Some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). + +**Tip**: Look at the `-j` output to identify which fields are available for the particular URL + +For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. + +Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. + +To use percent literals in an output template use `%%`. To output to stdout use `-o -`. + +The current default template is `%(title)s [%(id)s].%(ext)s`. + +In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title. + +#### Output template examples + +```bash +$ yt-dlp --print filename -o "test video.%(ext)s" BaW_jenozKc +test video.webm # Literal name with correct extension + +$ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc +youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters + +$ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames +youtube-dl_test_video_.webm # Restricted file name + +# Download YouTube playlist videos in separate directory indexed by video order in a playlist +$ yt-dlp -o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + +# Download YouTube playlist videos in separate directories according to their uploaded year +$ yt-dlp -o "%(upload_date>%Y)s/%(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + +# Prefix playlist index with " - " separator, but only if it is available +$ yt-dlp -o "%(playlist_index&{} - |)s%(title)s.%(ext)s" BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists" + +# Download all playlists of YouTube channel/user keeping each playlist in separate directory: +$ yt-dlp -o "%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/user/TheLinuxFoundation/playlists" + +# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home +$ yt-dlp -u user -p password -P "~/MyVideos" -o "%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s" "https://www.udemy.com/java-tutorial" + +# Download entire series season keeping each series and each season in separate directory under C:/MyVideos +$ yt-dlp -P "C:/MyVideos" -o "%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" "https://videomore.ru/kino_v_detalayah/5_sezon/367617" + +# Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext" +# and put all temporary files in "C:\MyVideos\tmp" +$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs + +# Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext" +$ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs + +# Stream the video being downloaded to stdout +$ yt-dlp -o - BaW_jenozKc +``` + +# FORMAT SELECTION + +By default, yt-dlp tries to download the best available quality if you **don't** pass any options. +This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`. + +**Deprecation warning**: Latest versions of yt-dlp can stream multiple formats to the stdout simultaneously using ffmpeg. So, in future versions, the default for this will be set to `-f bv*+ba/b` similar to normal downloads. If you want to preserve the `-f b/bv+ba` setting, it is recommended to explicitly specify it in the configuration options. + +The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. + +<!-- MANPAGE: BEGIN EXCLUDED SECTION --> +**tl;dr:** [navigate me to examples](#format-selection-examples). +<!-- MANPAGE: END EXCLUDED SECTION --> + +The simplest case is requesting a specific format; e.g. with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. + +You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. + +You can use `-f -` to interactively provide the format selector *for each video* + +You can also use special names to select particular edge case formats: + + - `all`: Select **all formats** separately + - `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both) + - `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (ie; `vcodec!=none or acodec!=none`) + - `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]` + - `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]` + - `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]` + - `ba`, `bestaudio`: Select the best quality **audio-only** format. Equivalent to `best*[vcodec=none]` + - `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]` ([Do not use!](https://github.com/yt-dlp/yt-dlp/issues/979#issuecomment-919629354)) + - `w*`, `worst*`: Select the worst quality format that contains either a video or an audio + - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]` + - `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]` + - `wv*`, `worstvideo*`: Select the worst quality format that contains video. It may also contain audio. Equivalent to `worst*[vcodec!=none]` + - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]` + - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]` + +For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details. + +You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream. + +If you want to download multiple videos, and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download. + +If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. + +You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed); e.g. `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. + +**Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video + +Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download only `best` while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. + +## Filtering Formats + +You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"` since filters without a selector are interpreted as `best`). + +The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): + + - `filesize`: The number of bytes, if known in advance + - `filesize_approx`: An estimate for the number of bytes + - `width`: Width of the video, if known + - `height`: Height of the video, if known + - `aspect_ratio`: Aspect ratio of the video, if known + - `tbr`: Average bitrate of audio and video in KBit/s + - `abr`: Average audio bitrate in KBit/s + - `vbr`: Average video bitrate in KBit/s + - `asr`: Audio sampling rate in Hertz + - `fps`: Frame rate + - `audio_channels`: The number of audio channels + - `stretched_ratio`: `width:height` of the video's pixels, if not square + +Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains), `~=` (matches regex) and following string meta fields: + + - `url`: Video URL + - `ext`: File extension + - `acodec`: Name of the audio codec in use + - `vcodec`: Name of the video codec in use + - `container`: Name of the container format + - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) + - `language`: Language code + - `dynamic_range`: The dynamic range of the video + - `format_id`: A short description of the format + - `format`: A human-readable description of the format + - `format_note`: Additional info about the format + - `resolution`: Textual description of width and height + +Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`. + +**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. + +Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. + +Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480. + +## Sorting Formats + +You can change the criteria for being considered the `best` by using `-S` (`--format-sort`). The general format for this is `--format-sort field1,field2...`. + +The available fields are: + + - `hasvid`: Gives priority to formats that have a video stream + - `hasaud`: Gives priority to formats that have an audio stream + - `ie_pref`: The format preference + - `lang`: The language preference + - `quality`: The quality of the format + - `source`: The preference of the source + - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`) + - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other) + - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `ac4` > `eac3` > `ac3` > `dts` > other) + - `codec`: Equivalent to `vcodec,acodec` + - `vext`: Video Extension (`mp4` > `mov` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. + - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac` + - `ext`: Equivalent to `vext,aext` + - `filesize`: Exact filesize, if known in advance + - `fs_approx`: Approximate filesize + - `size`: Exact filesize if available, otherwise approximate filesize + - `height`: Height of video + - `width`: Width of video + - `res`: Video resolution, calculated as the smallest dimension. + - `fps`: Framerate of video + - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`) + - `channels`: The number of audio channels + - `tbr`: Total average bitrate in KBit/s + - `vbr`: Average video bitrate in KBit/s + - `abr`: Average audio bitrate in KBit/s + - `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr` + - `asr`: Audio sample rate in Hz + +**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names. + +All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB. + +The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. + +Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. + +If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. + +**Tip**: You can use the `-v -F` to see how the formats have been sorted (worst to best). + +## Format Selection examples + +```bash +# Download and merge the best video-only format and the best audio-only format, +# or download the best combined format if video-only format is not available +$ yt-dlp -f "bv+ba/b" + +# Download best format that contains video, +# and if it doesn't already have an audio stream, merge it with best audio-only format +$ yt-dlp -f "bv*+ba/b" + +# Same as above +$ yt-dlp + +# Download the best video-only format and the best audio-only format without merging them +# For this case, an output template should be used since +# by default, bestvideo and bestaudio will have the same file name. +$ yt-dlp -f "bv,ba" -o "%(title)s.f%(format_id)s.%(ext)s" + +# Download and merge the best format that has a video stream, +# and all audio-only formats into one file +$ yt-dlp -f "bv*+mergeall[vcodec=none]" --audio-multistreams + +# Download and merge the best format that has a video stream, +# and the best 2 audio-only formats into one file +$ yt-dlp -f "bv*+ba+ba.2" --audio-multistreams + + +# The following examples show the old method (without -S) of format selection +# and how to use -S to achieve a similar but (generally) better result + +# Download the worst video available (old method) +$ yt-dlp -f "wv*+wa/w" + +# Download the best video available but with the smallest resolution +$ yt-dlp -S "+res" + +# Download the smallest video available +$ yt-dlp -S "+size,+br" + + + +# Download the best mp4 video available, or the best video if no mp4 available +$ yt-dlp -f "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4] / bv*+ba/b" + +# Download the best video with the best extension +# (For video, mp4 > mov > webm > flv. For audio, m4a > aac > mp3 ...) +$ yt-dlp -S "ext" + + + +# Download the best video available but no better than 480p, +# or the worst video if there is no video under 480p +$ yt-dlp -f "bv*[height<=480]+ba/b[height<=480] / wv*+ba/w" + +# Download the best video available with the largest height but no better than 480p, +# or the best video with the smallest resolution if there is no video under 480p +$ yt-dlp -S "height:480" + +# Download the best video available with the largest resolution but no better than 480p, +# or the best video with the smallest resolution if there is no video under 480p +# Resolution is determined by using the smallest dimension. +# So this works correctly for vertical videos as well +$ yt-dlp -S "res:480" + + + +# Download the best video (that also has audio) but no bigger than 50 MB, +# or the worst video (that also has audio) if there is no video under 50 MB +$ yt-dlp -f "b[filesize<50M] / w" + +# Download largest video (that also has audio) but no bigger than 50 MB, +# or the smallest video (that also has audio) if there is no video under 50 MB +$ yt-dlp -f "b" -S "filesize:50M" + +# Download best video (that also has audio) that is closest in size to 50 MB +$ yt-dlp -f "b" -S "filesize~50M" + + + +# Download best video available via direct link over HTTP/HTTPS protocol, +# or the best video available via any protocol if there is no such video +$ yt-dlp -f "(bv*+ba/b)[protocol^=http][protocol!*=dash] / (bv*+ba/b)" + +# Download best video available via the best protocol +# (https/ftps > http/ftp > m3u8_native > m3u8 > http_dash_segments ...) +$ yt-dlp -S "proto" + + + +# Download the best video with either h264 or h265 codec, +# or the best video if there is no such video +$ yt-dlp -f "(bv*[vcodec~='^((he|a)vc|h26[45])']+ba) / (bv*+ba/b)" + +# Download the best video with best codec no better than h264, +# or the best video with worst codec if there is no such video +$ yt-dlp -S "codec:h264" + +# Download the best video with worst codec no worse than h264, +# or the best video with best codec if there is no such video +$ yt-dlp -S "+codec:h264" + + + +# More complex examples + +# Download the best video no better than 720p preferring framerate greater than 30, +# or the worst video (still preferring framerate greater than 30) if there is no such video +$ yt-dlp -f "((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)" + +# Download the video with the largest resolution no better than 720p, +# or the video with the smallest resolution available if there is no such video, +# preferring larger framerate for formats with the same resolution +$ yt-dlp -S "res:720,fps" + + + +# Download the video with smallest resolution no worse than 480p, +# or the video with the largest resolution available if there is no such video, +# preferring better codec and then larger total bitrate for the same resolution +$ yt-dlp -S "+res:480,codec,br" +``` + +# MODIFYING METADATA + +The metadata obtained by the extractors can be modified by using `--parse-metadata` and `--replace-in-metadata` + +`--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use. + +The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. + +Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. + +This option also has a few special uses: + +* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)"` will download the first vimeo video found in the description + +* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file - you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (e.g. `meta1_language`). Any value set to the `meta_` field will overwrite all default values. + +**Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes. + +For reference, these are the fields yt-dlp adds by default to the file metadata: + +Metadata fields | From +:--------------------------|:------------------------------------------------ +`title` | `track` or `title` +`date` | `upload_date` +`description`, `synopsis` | `description` +`purl`, `comment` | `webpage_url` +`track` | `track_number` +`artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id` +`composer` | `composer` or `composers` +`genre` | `genre` or `genres` +`album` | `album` +`album_artist` | `album_artist` or `album_artists` +`disc` | `disc_number` +`show` | `series` +`season_number` | `season_number` +`episode_id` | `episode` or `episode_id` +`episode_sort` | `episode_number` +`language` of each stream | the format's `language` + +**Note**: The file format may not support some of these fields + + +## Modifying metadata examples + +```bash +# Interpret the title as "Artist - Title" +$ yt-dlp --parse-metadata "title:%(artist)s - %(title)s" + +# Regex example +$ yt-dlp --parse-metadata "description:Artist - (?P<artist>.+)" + +# Set title as "Series name S01E05" +$ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s" + +# Prioritize uploader as the "artist" field in video metadata +$ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --embed-metadata + +# Set "comment" field in video metadata using description instead of webpage_url, +# handling multiple lines correctly +$ yt-dlp --parse-metadata "description:(?s)(?P<meta_comment>.+)" --embed-metadata + +# Do not set any "synopsis" in the video metadata +$ yt-dlp --parse-metadata ":(?P<meta_synopsis>)" + +# Remove "formats" field from the infojson by setting it to an empty string +$ yt-dlp --parse-metadata "video::(?P<formats>)" --write-info-json + +# Replace all spaces and "_" in title and uploader with a `-` +$ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" + +``` + +# EXTRACTOR ARGUMENTS + +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"` + +Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` + +The following extractors use this feature: + +#### youtube +* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes +* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively +* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. +* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details +* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. +* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) +* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` + * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total +* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) +* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others +* `innertube_key`: Innertube API key to use for all API requests +* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning + +#### youtubetab (YouTube playlists, channels, feeds, etc.) +* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) +* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off + +#### generic +* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Does not apply to ffmpeg +* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE` +* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist +* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live` + +#### funimation +* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` +* `version`: The video version to extract - `uncut` or `simulcast` + +#### crunchyrollbeta (Crunchyroll) +* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2` +* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None` + +#### vikichannel +* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` + +#### niconico +* `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.** + +#### youtubewebarchive +* `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures` + +#### gamejolt +* `comment_sort`: `hot` (default), `you` (cookies needed), `top`, `new` - choose comment sorting mode (on GameJolt's side) + +#### hotstar +* `res`: resolution to ignore - one or more of `sd`, `hd`, `fhd` +* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265` +* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` + +#### niconicochannelplus +* `max_comments`: Maximum number of comments to extract - default is `120` + +#### tiktok +* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com` +* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1` +* `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221` + +#### rokfinchannel +* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` + +#### twitter +* `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in + +#### stacommu, wrestleuniverse +* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage + +#### twitch +* `client_id`: Client ID value to be sent with GraphQL requests, e.g. `twitch:client_id=kimne78kx3ncx6brgo4mv6wki5h1ko` + +#### nhkradirulive (NHK らじる★らじる LIVE) +* `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo` + +#### nflplusreplay +* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default + +#### jiosaavn +* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320` + +**Note**: These options may be changed/removed in the future without concern for backward compatibility + +<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> + + +# PLUGINS + +Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!** + +Plugins can be of `<type>`s `extractor` or `postprocessor`. +- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. +- Extractor plugins take priority over builtin extractors. +- Postprocessor plugins can be invoked using `--use-postprocessor NAME`. + + +Plugins are loaded from the namespace packages `yt_dlp_plugins.extractor` and `yt_dlp_plugins.postprocessor`. + +In other words, the file structure on the disk looks something like: + + yt_dlp_plugins/ + extractor/ + myplugin.py + postprocessor/ + myplugin.py + +yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them. + +See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) + +## Installing Plugins + +Plugins can be installed using various methods and locations. + +1. **Configuration directories**: + Plugin packages (containing a `yt_dlp_plugins` namespace folder) can be dropped into the following standard [configuration locations](#configuration): + * **User Plugins** + * `${XDG_CONFIG_HOME}/yt-dlp/plugins/<package name>/yt_dlp_plugins/` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp-plugins/<package name>/yt_dlp_plugins/` + * `${APPDATA}/yt-dlp/plugins/<package name>/yt_dlp_plugins/` (recommended on Windows) + * `${APPDATA}/yt-dlp-plugins/<package name>/yt_dlp_plugins/` + * `~/.yt-dlp/plugins/<package name>/yt_dlp_plugins/` + * `~/yt-dlp-plugins/<package name>/yt_dlp_plugins/` + * **System Plugins** + * `/etc/yt-dlp/plugins/<package name>/yt_dlp_plugins/` + * `/etc/yt-dlp-plugins/<package name>/yt_dlp_plugins/` +2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location (recommended for portable installations): + * Binary: where `<root-dir>/yt-dlp.exe`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/` + * Source: where `<root-dir>/yt_dlp/__main__.py`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/` + +3. **pip and other locations in `PYTHONPATH`** + * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. + * Note: plugin files between plugin packages installed with pip must have unique filenames. + * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. + * Note: This does not apply for Pyinstaller/py2exe builds. + + +`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. +* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins/<type>/myplugin.py` + +Run yt-dlp with `--verbose` to check if the plugin has been loaded. + +## Developing Plugins + +See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide. + +All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`). + +To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above. + +If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability. + +See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) on how to write and test an extractor. + +# EMBEDDING YT-DLP + +yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language. + +Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse. + +From a Python program, you can embed yt-dlp in a more powerful fashion, like this: + +```python +from yt_dlp import YoutubeDL + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] +with YoutubeDL() as ydl: + ydl.download(URLS) +``` + +Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L183) or `help(yt_dlp.YoutubeDL)` in a Python shell. If you are already familiar with the CLI, you can use [`devscripts/cli_to_api.py`](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) to translate any CLI switches to `YoutubeDL` params. + +**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information) + +## Embedding examples + +#### Extracting information + +```python +import json +import yt_dlp + +URL = 'https://www.youtube.com/watch?v=BaW_jenozKc' + +# ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functions +ydl_opts = {} +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(URL, download=False) + + # ℹ️ ydl.sanitize_info makes the info json-serializable + print(json.dumps(ydl.sanitize_info(info))) +``` +#### Download using an info-json + +```python +import yt_dlp + +INFO_FILE = 'path/to/video.info.json' + +with yt_dlp.YoutubeDL() as ydl: + error_code = ydl.download_with_info_file(INFO_FILE) + +print('Some videos failed to download' if error_code + else 'All videos successfully downloaded') +``` + +#### Extract audio + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +ydl_opts = { + 'format': 'm4a/bestaudio/best', + # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments + 'postprocessors': [{ # Extract audio using ffmpeg + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'm4a', + }] +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + error_code = ydl.download(URLS) +``` + +#### Filter videos + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +def longer_than_a_minute(info, *, incomplete): + """Download only videos longer than a minute (or with unknown duration)""" + duration = info.get('duration') + if duration and duration < 60: + return 'The video is too short' + +ydl_opts = { + 'match_filter': longer_than_a_minute, +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + error_code = ydl.download(URLS) +``` + +#### Adding logger and progress hook + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +class MyLogger: + def debug(self, msg): + # For compatibility with youtube-dl, both debug and info are passed into debug + # You can distinguish them by the prefix '[debug] ' + if msg.startswith('[debug] '): + pass + else: + self.info(msg) + + def info(self, msg): + pass + + def warning(self, msg): + pass + + def error(self, msg): + print(msg) + + +# ℹ️ See "progress_hooks" in help(yt_dlp.YoutubeDL) +def my_hook(d): + if d['status'] == 'finished': + print('Done downloading, now post-processing ...') + + +ydl_opts = { + 'logger': MyLogger(), + 'progress_hooks': [my_hook], +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download(URLS) +``` + +#### Add a custom PostProcessor + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +# ℹ️ See help(yt_dlp.postprocessor.PostProcessor) +class MyCustomPP(yt_dlp.postprocessor.PostProcessor): + def run(self, info): + self.to_screen('Doing stuff') + return [], info + + +with yt_dlp.YoutubeDL() as ydl: + # ℹ️ "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN + ydl.add_post_processor(MyCustomPP(), when='pre_process') + ydl.download(URLS) +``` + + +#### Use a custom format selector + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +def format_selector(ctx): + """ Select the best video and the best audio that won't result in an mkv. + NOTE: This is just an example and does not handle all cases """ + + # formats are already sorted worst to best + formats = ctx.get('formats')[::-1] + + # acodec='none' means there is no audio + best_video = next(f for f in formats + if f['vcodec'] != 'none' and f['acodec'] == 'none') + + # find compatible audio extension + audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] + # vcodec='none' means there is no video + best_audio = next(f for f in formats if ( + f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) + + # These are the minimum required fields for a merged format + yield { + 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', + 'ext': best_video['ext'], + 'requested_formats': [best_video, best_audio], + # Must be + separated list of protocols + 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' + } + + +ydl_opts = { + 'format': format_selector, +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download(URLS) +``` + + +# CHANGES FROM YOUTUBE-DL + +### New features + +* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@be008e6**](https://github.com/ytdl-org/youtube-dl/commit/be008e657d79832642e2158557c899249c9e31cd) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) + +* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API + +* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) + +* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. + +* **YouTube improvements**: + * Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) + * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\*** + * Supports some (but not all) age-gated content without cookies + * Download livestreams from the start using `--live-from-start` (*experimental*) + * Channel URLs download all uploads of the channel, including shorts and live + +* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]` + +* **Download time range**: Videos can be downloaded partially based on either timestamps or chapters using `--download-sections` + +* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters` + +* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used + +* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats + +* **New and fixed extractors**: Many new extractors have been added and a lot of existing ones have been fixed. See the [changelog](Changelog.md) or the [list of supported sites](supportedsites.md) + +* **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN etc. + +* **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details + +* **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`) + +* **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [CONFIGURATION](#configuration) for details + +* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` + +* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc + +* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc + +* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details + +* **Self updater**: The releases can be updated using `yt-dlp -U`, and downgraded using `--update-to` if required + +* **Automated builds**: [Nightly/master builds](#update-channels) can be used with `--update-to nightly` and `--update-to master` + +See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes + +Features marked with a **\*** have been back-ported to youtube-dl + +### Differences in default behavior + +Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: + +* yt-dlp supports only [Python 3.8+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) +* The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details +* `avconv` is not supported as an alternative to `ffmpeg` +* yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations +* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` +* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order +* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this +* Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both +* `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead +* When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files +* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this +* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this +* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior +* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this +* Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading +* YouTube channel URLs download all uploads of the channel. To download only the videos in a specific tab, pass the tab's URL. If the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections +* Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this +* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date. +* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this +* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead +* Some internal metadata such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this +* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this +* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` +* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior +* ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~ +* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this +* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values +* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. +* The sub-modules `swfinterp`, `casefold` are removed. + +For ease of use, a few more compat options are available: + +* `--compat-options all`: Use all compat options (Do NOT use) +* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx` +* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx` +* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` +* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx` +* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options + +### Deprecated options + +These are all the deprecated options and the current alternative to achieve the same effect + +#### Almost redundant options +While these options are almost the same as their new counterparts, there are some differences that prevents them being redundant + + -j, --dump-json --print "%()j" + -F, --list-formats --print formats_table + --list-thumbnails --print thumbnails_table --print playlist:thumbnails_table + --list-subs --print automatic_captions_table --print subtitles_table + +#### Redundant options +While these options are redundant, they are still expected to be used due to their ease of use + + --get-description --print description + --get-duration --print duration_string + --get-filename --print filename + --get-format --print format + --get-id --print id + --get-thumbnail --print thumbnail + -e, --get-title --print title + -g, --get-url --print urls + --match-title REGEX --match-filter "title ~= (?i)REGEX" + --reject-title REGEX --match-filter "title !~= (?i)REGEX" + --min-views COUNT --match-filter "view_count >=? COUNT" + --max-views COUNT --match-filter "view_count <=? COUNT" + --break-on-reject Use --break-match-filter + --user-agent UA --add-header "User-Agent:UA" + --referer URL --add-header "Referer:URL" + --playlist-start NUMBER -I NUMBER: + --playlist-end NUMBER -I :NUMBER + --playlist-reverse -I ::-1 + --no-playlist-reverse Default + --no-colors --color no_color + +#### Not recommended +While these options still work, their use is not recommended since there are other alternatives to achieve the same + + --force-generic-extractor --ies generic,default + --exec-before-download CMD --exec "before_dl:CMD" + --no-exec-before-download --no-exec + --all-formats -f all + --all-subs --sub-langs all --write-subs + --print-json -j --no-simulate + --autonumber-size NUMBER Use string formatting, e.g. %(autonumber)03d + --autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s + --id -o "%(id)s.%(ext)s" + --metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT" + --hls-prefer-native --downloader "m3u8:native" + --hls-prefer-ffmpeg --downloader "m3u8:ffmpeg" + --list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table) + --list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old) + --youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest) + --youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest) + --youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest) + --youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest) + --geo-bypass --xff "default" + --no-geo-bypass --xff "never" + --geo-bypass-country CODE --xff CODE + --geo-bypass-ip-block IP_BLOCK --xff IP_BLOCK + +#### Developer options +These options are not intended to be used by the end-user + + --test Download only part of video for testing extractors + --load-pages Load pages dumped by --write-pages + --youtube-print-sig-code For testing youtube signatures + --allow-unplayable-formats List unplayable formats also + --no-allow-unplayable-formats Default + +#### Old aliases +These are aliases that are no longer documented for various reasons + + --avconv-location --ffmpeg-location + --clean-infojson --clean-info-json + --cn-verification-proxy URL --geo-verification-proxy URL + --dump-headers --print-traffic + --dump-intermediate-pages --dump-pages + --force-write-download-archive --force-write-archive + --load-info --load-info-json + --no-clean-infojson --no-clean-info-json + --no-split-tracks --no-split-chapters + --no-write-srt --no-write-subs + --prefer-unsecure --prefer-insecure + --rate-limit RATE --limit-rate RATE + --split-tracks --split-chapters + --srt-lang LANGS --sub-langs LANGS + --trim-file-names LENGTH --trim-filenames LENGTH + --write-srt --write-subs + --yes-overwrites --force-overwrites + +#### Sponskrub Options +Support for [SponSkrub](https://github.com/faissaloo/SponSkrub) has been deprecated in favor of the `--sponsorblock` options + + --sponskrub --sponsorblock-mark all + --no-sponskrub --no-sponsorblock + --sponskrub-cut --sponsorblock-remove all + --no-sponskrub-cut --sponsorblock-remove -all + --sponskrub-force Not applicable + --no-sponskrub-force Not applicable + --sponskrub-location Not applicable + --sponskrub-args Not applicable + +#### No longer supported +These options may no longer work as intended + + --prefer-avconv avconv is not officially supported by yt-dlp (Alias: --no-prefer-ffmpeg) + --prefer-ffmpeg Default (Alias: --no-prefer-avconv) + -C, --call-home Not implemented + --no-call-home Default + --include-ads No longer supported + --no-include-ads Default + --write-annotations No supported site has annotations now + --no-write-annotations Default + --compat-options seperate-video-versions No longer needed + +#### Removed +These options were deprecated since 2014 and have now been entirely removed + + -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s" + -t, -l, --title, --literal -o "%(title)s-%(id)s.%(ext)s" + + +# CONTRIBUTING +See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions) + +# WIKI +See the [Wiki](https://github.com/yt-dlp/yt-dlp/wiki) for more information diff --git a/bundle/__init__.py b/bundle/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bundle/py2exe.py b/bundle/py2exe.py new file mode 100755 index 0000000..ccb52ea --- /dev/null +++ b/bundle/py2exe.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +# Allow execution from anywhere +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import warnings + +from py2exe import freeze + +from devscripts.utils import read_version + +VERSION = read_version() + + +def main(): + warnings.warn( + 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' + 'It is recommended to run "pyinst.py" to build using pyinstaller instead') + + freeze( + console=[{ + 'script': './yt_dlp/__main__.py', + 'dest_base': 'yt-dlp', + 'icon_resources': [(1, 'devscripts/logo.ico')], + }], + version_info={ + 'version': VERSION, + 'description': 'A youtube-dl fork with additional features and patches', + 'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>', + 'product_name': 'yt-dlp', + 'product_version': VERSION, + }, + options={ + 'bundle_files': 0, + 'compressed': 1, + 'optimize': 2, + 'dist_dir': './dist', + 'excludes': [ + # py2exe cannot import Crypto + 'Crypto', + 'Cryptodome', + # py2exe appears to confuse this with our socks library. + # We don't use pysocks and urllib3.contrib.socks would fail to import if tried. + 'urllib3.contrib.socks' + ], + 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], + # Modules that are only imported dynamically must be added here + 'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated', + 'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'], + }, + zipfile=None, + ) + + +if __name__ == '__main__': + main() diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py new file mode 100755 index 0000000..db9dbfd --- /dev/null +++ b/bundle/pyinstaller.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import platform + +from PyInstaller.__main__ import run as run_pyinstaller + +from devscripts.utils import read_version + +OS_NAME, MACHINE, ARCH = sys.platform, platform.machine().lower(), platform.architecture()[0][:2] +if MACHINE in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): + MACHINE = 'x86' if ARCH == '32' else '' + + +def main(): + opts, version = parse_options(), read_version() + + onedir = '--onedir' in opts or '-D' in opts + if not onedir and '-F' not in opts and '--onefile' not in opts: + opts.append('--onefile') + + name, final_file = exe(onedir) + print(f'Building yt-dlp v{version} for {OS_NAME} {platform.machine()} with options {opts}') + print('Remember to update the version using "devscripts/update-version.py"') + if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'): + print('WARNING: Building without lazy_extractors. Run ' + '"devscripts/make_lazy_extractors.py" to build lazy extractors', file=sys.stderr) + print(f'Destination: {final_file}\n') + + opts = [ + f'--name={name}', + '--icon=devscripts/logo.ico', + '--upx-exclude=vcruntime140.dll', + '--noconfirm', + '--additional-hooks-dir=yt_dlp/__pyinstaller', + *opts, + 'yt_dlp/__main__.py', + ] + + print(f'Running PyInstaller with {opts}') + run_pyinstaller(opts) + set_version_info(final_file, version) + + +def parse_options(): + # Compatibility with older arguments + opts = sys.argv[1:] + if opts[0:1] in (['32'], ['64']): + if ARCH != opts[0]: + raise Exception(f'{opts[0]}bit executable cannot be built on a {ARCH}bit system') + opts = opts[1:] + return opts + + +def exe(onedir): + """@returns (name, path)""" + name = '_'.join(filter(None, ( + 'yt-dlp', + {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), + MACHINE, + ))) + return name, ''.join(filter(None, ( + 'dist/', + onedir and f'{name}/', + name, + OS_NAME == 'win32' and '.exe' + ))) + + +def version_to_list(version): + version_list = version.split('.') + return list(map(int, version_list)) + [0] * (4 - len(version_list)) + + +def set_version_info(exe, version): + if OS_NAME == 'win32': + windows_set_version(exe, version) + + +def windows_set_version(exe, version): + from PyInstaller.utils.win32.versioninfo import ( + FixedFileInfo, + StringFileInfo, + StringStruct, + StringTable, + VarFileInfo, + VarStruct, + VSVersionInfo, + ) + + try: + from PyInstaller.utils.win32.versioninfo import SetVersion + except ImportError: # Pyinstaller >= 5.8 + from PyInstaller.utils.win32.versioninfo import write_version_info_to_executable as SetVersion + + version_list = version_to_list(version) + suffix = MACHINE and f'_{MACHINE}' + SetVersion(exe, VSVersionInfo( + ffi=FixedFileInfo( + filevers=version_list, + prodvers=version_list, + mask=0x3F, + flags=0x0, + OS=0x4, + fileType=0x1, + subtype=0x0, + date=(0, 0), + ), + kids=[ + StringFileInfo([StringTable('040904B0', [ + StringStruct('Comments', 'yt-dlp%s Command Line Interface' % suffix), + StringStruct('CompanyName', 'https://github.com/yt-dlp'), + StringStruct('FileDescription', 'yt-dlp%s' % (MACHINE and f' ({MACHINE})')), + StringStruct('FileVersion', version), + StringStruct('InternalName', f'yt-dlp{suffix}'), + StringStruct('LegalCopyright', 'pukkandan.ytdlp@gmail.com | UNLICENSE'), + StringStruct('OriginalFilename', f'yt-dlp{suffix}.exe'), + StringStruct('ProductName', f'yt-dlp{suffix}'), + StringStruct( + 'ProductVersion', f'{version}{suffix} on Python {platform.python_version()}'), + ])]), VarFileInfo([VarStruct('Translation', [0, 1200])]) + ] + )) + + +if __name__ == '__main__': + main() diff --git a/devscripts/__init__.py b/devscripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in new file mode 100644 index 0000000..21f5279 --- /dev/null +++ b/devscripts/bash-completion.in @@ -0,0 +1,29 @@ +__yt_dlp() +{ + local cur prev opts fileopts diropts keywords + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + opts="{{flags}}" + keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" + fileopts="-a|--batch-file|--download-archive|--cookies|--load-info" + diropts="--cache-dir" + + if [[ ${prev} =~ ${fileopts} ]]; then + COMPREPLY=( $(compgen -f -- ${cur}) ) + return 0 + elif [[ ${prev} =~ ${diropts} ]]; then + COMPREPLY=( $(compgen -d -- ${cur}) ) + return 0 + fi + + if [[ ${cur} =~ : ]]; then + COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) + return 0 + elif [[ ${cur} == * ]] ; then + COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) + return 0 + fi +} + +complete -F __yt_dlp yt-dlp diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py new file mode 100755 index 0000000..9b4a9d4 --- /dev/null +++ b/devscripts/bash-completion.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import yt_dlp + +BASH_COMPLETION_FILE = "completions/bash/yt-dlp" +BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" + + +def build_completion(opt_parser): + opts_flag = [] + for group in opt_parser.option_groups: + for option in group.option_list: + # for every long flag + opts_flag.append(option.get_opt_string()) + with open(BASH_COMPLETION_TEMPLATE) as f: + template = f.read() + with open(BASH_COMPLETION_FILE, "w") as f: + # just using the special char + filled_template = template.replace("{{flags}}", " ".join(opts_flag)) + f.write(filled_template) + + +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] +build_completion(parser) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json new file mode 100644 index 0000000..2a34ad0 --- /dev/null +++ b/devscripts/changelog_override.json @@ -0,0 +1,130 @@ +[ + { + "action": "add", + "when": "29cb20bd563c02671b31dd840139e93dd37150a1", + "short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`" + }, + { + "action": "add", + "when": "5038f6d713303e0967d002216e7a88652401c22a", + "short": "[priority] **YouTube throttling fixes!**" + }, + { + "action": "remove", + "when": "2e023649ea4e11151545a34dc1360c114981a236" + }, + { + "action": "add", + "when": "01aba2519a0884ef17d5f85608dbd2a455577147", + "short": "[priority] YouTube: Improved throttling and signature fixes" + }, + { + "action": "change", + "when": "c86e433c35fe5da6cb29f3539eef97497f84ed38", + "short": "[extractor/niconico:series] Fix extraction (#6898)", + "authors": ["sqrtNOT"] + }, + { + "action": "change", + "when": "69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2", + "short": "[extractor/youtube:music_search_url] Extract title (#7102)", + "authors": ["kangalio"] + }, + { + "action": "change", + "when": "8417f26b8a819cd7ffcd4e000ca3e45033e670fb", + "short": "Add option `--color` (#6904)", + "authors": ["Grub4K"] + }, + { + "action": "change", + "when": "b4e0d75848e9447cee2cd3646ce54d4744a7ff56", + "short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL", + "authors": ["pukkandan"] + }, + { + "action": "change", + "when": "1e75d97db21152acc764b30a688e516f04b8a142", + "short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such", + "authors": ["pukkandan"] + }, + { + "action": "change", + "when": "f2ff0f6f1914b82d4a51681a72cc0828115dcb4a", + "short": "[extractor/motherless] Add gallery support, fix groups (#7211)", + "authors": ["rexlambert22", "Ti4eeT4e"] + }, + { + "action": "change", + "when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700", + "short": "[misc] Revert \"Add automatic duplicate issue detection\"", + "authors": ["pukkandan"] + }, + { + "action": "add", + "when": "1ceb657bdd254ad961489e5060f2ccc7d556b729", + "short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookies` field to info.json and deprecate `http_headers.Cookie`" + }, + { + "action": "change", + "when": "b03fa7834579a01cc5fba48c0e73488a16683d48", + "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b", + "authors": ["pukkandan"] + }, + { + "action": "change", + "when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f", + "short": "[test] Add tests for socks proxies (#7908)", + "authors": ["coletdjnz"] + }, + { + "action": "change", + "when": "4bf912282a34b58b6b35d8f7e6be535770c89c76", + "short": "[rh:urllib] Remove dot segments during URL normalization (#7662)", + "authors": ["coletdjnz"] + }, + { + "action": "change", + "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85", + "short": "[rh:urllib] Simplify gzip decoding (#7611)", + "authors": ["Grub4K"] + }, + { + "action": "add", + "when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b", + "short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)" + }, + { + "action": "add", + "when": "61bdf15fc7400601c3da1aa7a43917310a5bf391", + "short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands." + }, + { + "action": "change", + "when": "8a8b54523addf46dfd50ef599761a81bc22362e6", + "short": "[rh:requests] Add handler for `requests` HTTP library (#3668)\n\n\tAdds support for HTTPS proxies and persistent connections (keep-alive)", + "authors": ["bashonly", "coletdjnz", "Grub4K"] + }, + { + "action": "add", + "when": "1d03633c5a1621b9f3a756f0a4f9dc61fab3aeaa", + "short": "[priority] **The release channels have been adjusted!**\n\t* [`master`](https://github.com/yt-dlp/yt-dlp-master-builds) builds are made after each push, containing the latest fixes (but also possibly bugs). This was previously the `nightly` channel.\n\t* [`nightly`](https://github.com/yt-dlp/yt-dlp-nightly-builds) builds are now made once a day, if there were any changes." + }, + { + "action": "add", + "when": "f04b5bedad7b281bee9814686bba1762bae092eb", + "short": "[priority] Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x)\n\t- Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers" + }, + { + "action": "change", + "when": "15f22b4880b6b3f71f350c64d70976ae65b9f1ca", + "short": "[webvtt] Allow spaces before newlines for CueBlock (#7681)", + "authors": ["TSRBerry"] + }, + { + "action": "change", + "when": "4ce57d3b873c2887814cbec03d029533e82f7db5", + "short": "[ie] Support multi-period MPD streams (#6654)", + "authors": ["alard", "pukkandan"] + } +] diff --git a/devscripts/changelog_override.schema.json b/devscripts/changelog_override.schema.json new file mode 100644 index 0000000..9bd747b --- /dev/null +++ b/devscripts/changelog_override.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft/2020-12/schema", + "type": "array", + "uniqueItems": true, + "items": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "action": { + "enum": [ + "add" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "short" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "remove" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + } + }, + "required": [ + "action", + "hash" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "change" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "hash", + "short", + "authors" + ] + } + ] + } +} diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py new file mode 100644 index 0000000..fc72c30 --- /dev/null +++ b/devscripts/check-porn.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check +if we are not 'age_limit' tagging some porn site + +A second approach implemented relies on a list of porn domains, to activate it +pass the list filename as the only argument +""" + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import urllib.parse +import urllib.request + +from test.helper import gettestcases + +if len(sys.argv) > 1: + METHOD = 'LIST' + LIST = open(sys.argv[1]).read().decode('utf8').strip() +else: + METHOD = 'EURISTIC' + +for test in gettestcases(): + if METHOD == 'EURISTIC': + try: + webpage = urllib.request.urlopen(test['url'], timeout=10).read() + except Exception: + print('\nFail: {}'.format(test['name'])) + continue + + webpage = webpage.decode('utf8', 'replace') + + RESULT = 'porn' in webpage.lower() + + elif METHOD == 'LIST': + domain = urllib.parse.urlparse(test['url']).netloc + if not domain: + print('\nFail: {}'.format(test['name'])) + continue + domain = '.'.join(domain.split('.')[-2:]) + + RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) + + if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] + or test['info_dict']['age_limit'] != 18): + print('\nPotential missing age_limit check: {}'.format(test['name'])) + + elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] + and test['info_dict']['age_limit'] == 18): + print('\nPotential false negative: {}'.format(test['name'])) + + else: + sys.stdout.write('.') + sys.stdout.flush() + +print() diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py new file mode 100644 index 0000000..2aa51eb --- /dev/null +++ b/devscripts/cli_to_api.py @@ -0,0 +1,48 @@ +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import yt_dlp +import yt_dlp.options + +create_parser = yt_dlp.options.create_parser + + +def parse_patched_options(opts): + patched_parser = create_parser() + patched_parser.defaults.update({ + 'ignoreerrors': False, + 'retries': 0, + 'fragment_retries': 0, + 'extract_flat': False, + 'concat_playlist': 'never', + }) + yt_dlp.options.create_parser = lambda: patched_parser + try: + return yt_dlp.parse_options(opts) + finally: + yt_dlp.options.create_parser = create_parser + + +default_opts = parse_patched_options([]).ydl_opts + + +def cli_to_api(opts, cli_defaults=False): + opts = (yt_dlp.parse_options if cli_defaults else parse_patched_options)(opts).ydl_opts + + diff = {k: v for k, v in opts.items() if default_opts[k] != v} + if 'postprocessors' in diff: + diff['postprocessors'] = [pp for pp in diff['postprocessors'] + if pp not in default_opts['postprocessors']] + return diff + + +if __name__ == '__main__': + from pprint import pprint + + print('\nThe arguments passed translate to:\n') + pprint(cli_to_api(sys.argv[1:])) + print('\nCombining these with the CLI defaults gives:\n') + pprint(cli_to_api(sys.argv[1:], True)) diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in new file mode 100644 index 0000000..32938fb --- /dev/null +++ b/devscripts/fish-completion.in @@ -0,0 +1,5 @@ + +{{commands}} + + +complete --command yt-dlp --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py new file mode 100755 index 0000000..5d2f68a --- /dev/null +++ b/devscripts/fish-completion.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import optparse + +import yt_dlp +from yt_dlp.utils import shell_quote + +FISH_COMPLETION_FILE = 'completions/fish/yt-dlp.fish' +FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' + +EXTRA_ARGS = { + 'remux-video': ['--arguments', 'mp4 mkv', '--exclusive'], + 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'], + + # Options that need a file parameter + 'download-archive': ['--require-parameter'], + 'cookies': ['--require-parameter'], + 'load-info': ['--require-parameter'], + 'batch-file': ['--require-parameter'], +} + + +def build_completion(opt_parser): + commands = [] + + for group in opt_parser.option_groups: + for option in group.option_list: + long_option = option.get_opt_string().strip('-') + complete_cmd = ['complete', '--command', 'yt-dlp', '--long-option', long_option] + if option._short_opts: + complete_cmd += ['--short-option', option._short_opts[0].strip('-')] + if option.help != optparse.SUPPRESS_HELP: + complete_cmd += ['--description', option.help] + complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) + commands.append(shell_quote(complete_cmd)) + + with open(FISH_COMPLETION_TEMPLATE) as f: + template = f.read() + filled_template = template.replace('{{commands}}', '\n'.join(commands)) + with open(FISH_COMPLETION_FILE, 'w') as f: + f.write(filled_template) + + +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] +build_completion(parser) diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py new file mode 100644 index 0000000..7f3c88b --- /dev/null +++ b/devscripts/generate_aes_testdata.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import codecs +import subprocess + +from yt_dlp.aes import aes_encrypt, key_expansion +from yt_dlp.utils import intlist_to_bytes + +secret_msg = b'Secret message goes here' + + +def hex_str(int_list): + return codecs.encode(intlist_to_bytes(int_list), 'hex') + + +def openssl_encode(algo, key, iv): + cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] + prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + out, _ = prog.communicate(secret_msg) + return out + + +iv = key = [0x20, 0x15] + 14 * [0] + +r = openssl_encode('aes-128-cbc', key, iv) +print('aes_cbc_decrypt') +print(repr(r)) + +password = key +new_key = aes_encrypt(password, key_expansion(password)) +r = openssl_encode('aes-128-ctr', new_key, iv) +print('aes_decrypt_text 16') +print(repr(r)) + +password = key + 16 * [0] +new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16) +r = openssl_encode('aes-256-ctr', new_key, iv) +print('aes_decrypt_text 32') +print(repr(r)) diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py new file mode 100755 index 0000000..889d9ab --- /dev/null +++ b/devscripts/install_deps.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +# Allow execution from anywhere +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import argparse +import re +import subprocess + +from devscripts.tomlparse import parse_toml +from devscripts.utils import read_file + + +def parse_args(): + parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp') + parser.add_argument( + 'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)') + parser.add_argument( + '-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency') + parser.add_argument( + '-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group') + parser.add_argument( + '-o', '--only-optional', action='store_true', help='Only install optional dependencies') + parser.add_argument( + '-p', '--print', action='store_true', help='Only print a requirements.txt to stdout') + parser.add_argument( + '-u', '--user', action='store_true', help='Install with pip as --user') + return parser.parse_args() + + +def main(): + args = parse_args() + project_table = parse_toml(read_file(args.input))['project'] + optional_groups = project_table['optional-dependencies'] + excludes = args.exclude or [] + + deps = [] + if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group + deps.extend(project_table['dependencies']) + if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group + deps.extend(optional_groups['default']) + + def name(dependency): + return re.match(r'[\w-]+', dependency)[0].lower() + + target_map = {name(dep): dep for dep in deps} + + for include in filter(None, map(optional_groups.get, args.include or [])): + target_map.update(zip(map(name, include), include)) + + for exclude in map(name, excludes): + target_map.pop(exclude, None) + + targets = list(target_map.values()) + + if args.print: + for target in targets: + print(target) + return + + pip_args = [sys.executable, '-m', 'pip', 'install', '-U'] + if args.user: + pip_args.append('--user') + pip_args.extend(targets) + + return subprocess.call(pip_args) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py new file mode 100644 index 0000000..6f52165 --- /dev/null +++ b/devscripts/lazy_load_template.py @@ -0,0 +1,39 @@ +import importlib +import random +import re + +from ..utils import ( + age_restricted, + bug_reports_message, + classproperty, + variadic, + write_string, +) + +# These bloat the lazy_extractors, so allow them to passthrough silently +ALLOWED_CLASSMETHODS = {'extract_from_webpage', 'get_testcases', 'get_webpage_testcases'} +_WARNED = False + + +class LazyLoadMetaClass(type): + def __getattr__(cls, name): + global _WARNED + if ('_real_class' not in cls.__dict__ + and name not in ALLOWED_CLASSMETHODS and not _WARNED): + _WARNED = True + write_string('WARNING: Falling back to normal extractor since lazy extractor ' + f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n') + return getattr(cls.real_class, name) + + +class LazyLoadExtractor(metaclass=LazyLoadMetaClass): + @classproperty + def real_class(cls): + if '_real_class' not in cls.__dict__: + cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__) + return cls._real_class + + def __new__(cls, *args, **kwargs): + instance = cls.real_class.__new__(cls.real_class) + instance.__init__(*args, **kwargs) + return instance diff --git a/devscripts/logo.ico b/devscripts/logo.ico new file mode 100644 index 0000000..5503a43 Binary files /dev/null and b/devscripts/logo.ico differ diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py new file mode 100644 index 0000000..faab5fa --- /dev/null +++ b/devscripts/make_changelog.py @@ -0,0 +1,503 @@ +from __future__ import annotations + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import enum +import itertools +import json +import logging +import re +from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path + +from devscripts.utils import read_file, run_process, write_file + +BASE_URL = 'https://github.com' +LOCATION_PATH = Path(__file__).parent +HASH_LENGTH = 7 + +logger = logging.getLogger(__name__) + + +class CommitGroup(enum.Enum): + PRIORITY = 'Important' + CORE = 'Core' + EXTRACTOR = 'Extractor' + DOWNLOADER = 'Downloader' + POSTPROCESSOR = 'Postprocessor' + NETWORKING = 'Networking' + MISC = 'Misc.' + + @classmethod + @lru_cache + def subgroup_lookup(cls): + return { + name: group + for group, names in { + cls.MISC: { + 'build', + 'ci', + 'cleanup', + 'devscripts', + 'docs', + 'test', + }, + cls.NETWORKING: { + 'rh', + }, + }.items() + for name in names + } + + @classmethod + @lru_cache + def group_lookup(cls): + result = { + 'fd': cls.DOWNLOADER, + 'ie': cls.EXTRACTOR, + 'pp': cls.POSTPROCESSOR, + 'upstream': cls.CORE, + } + result.update({item.name.lower(): item for item in iter(cls)}) + return result + + @classmethod + def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: + group, _, subgroup = (group.strip().lower() for group in value.partition('/')) + + result = cls.group_lookup().get(group) + if not result: + if subgroup: + return None, value + subgroup = group + result = cls.subgroup_lookup().get(subgroup) + + return result, subgroup or None + + +@dataclass +class Commit: + hash: str | None + short: str + authors: list[str] + + def __str__(self): + result = f'{self.short!r}' + + if self.hash: + result += f' ({self.hash[:HASH_LENGTH]})' + + if self.authors: + authors = ', '.join(self.authors) + result += f' by {authors}' + + return result + + +@dataclass +class CommitInfo: + details: str | None + sub_details: tuple[str, ...] + message: str + issues: list[str] + commit: Commit + fixes: list[Commit] + + def key(self): + return ((self.details or '').lower(), self.sub_details, self.message) + + +def unique(items): + return sorted({item.strip().lower(): item for item in items if item}.values()) + + +class Changelog: + MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) + ALWAYS_SHOWN = (CommitGroup.PRIORITY,) + + def __init__(self, groups, repo, collapsible=False): + self._groups = groups + self._repo = repo + self._collapsible = collapsible + + def __str__(self): + return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') + + def _format_groups(self, groups): + first = True + for item in CommitGroup: + if self._collapsible and item not in self.ALWAYS_SHOWN and first: + first = False + yield '\n<details><summary><h3>Changelog</h3></summary>\n' + + group = groups[item] + if group: + yield self.format_module(item.value, group) + + if self._collapsible: + yield '\n</details>' + + def format_module(self, name, group): + result = f'\n#### {name} changes\n' if name else '\n' + return result + '\n'.join(self._format_group(group)) + + def _format_group(self, group): + sorted_group = sorted(group, key=CommitInfo.key) + detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) + for _, items in detail_groups: + items = list(items) + details = items[0].details + + if details == 'cleanup': + items = self._prepare_cleanup_misc_items(items) + + prefix = '-' + if details: + if len(items) == 1: + prefix = f'- **{details}**:' + else: + yield f'- **{details}**' + prefix = '\t-' + + sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details))) + for sub_details, entries in sub_detail_groups: + if not sub_details: + for entry in entries: + yield f'{prefix} {self.format_single_change(entry)}' + continue + + entries = list(entries) + sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}' + if len(entries) == 1: + yield f'{sub_prefix}: {self.format_single_change(entries[0])}' + continue + + yield sub_prefix + for entry in entries: + yield f'\t{prefix} {self.format_single_change(entry)}' + + def _prepare_cleanup_misc_items(self, items): + cleanup_misc_items = defaultdict(list) + sorted_items = [] + for item in items: + if self.MISC_RE.search(item.message): + cleanup_misc_items[tuple(item.commit.authors)].append(item) + else: + sorted_items.append(item) + + for commit_infos in cleanup_misc_items.values(): + sorted_items.append(CommitInfo( + 'cleanup', ('Miscellaneous',), ', '.join( + self._format_message_link(None, info.commit.hash) + for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')), + [], Commit(None, '', commit_infos[0].commit.authors), [])) + + return sorted_items + + def format_single_change(self, info: CommitInfo): + message, sep, rest = info.message.partition('\n') + if '[' not in message: + # If the message doesn't already contain markdown links, try to add a link to the commit + message = self._format_message_link(message, info.commit.hash) + + if info.issues: + message = f'{message} ({self._format_issues(info.issues)})' + + if info.commit.authors: + message = f'{message} by {self._format_authors(info.commit.authors)}' + + if info.fixes: + fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) + + authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) + if authors != info.commit.authors: + fix_message = f'{fix_message} by {self._format_authors(authors)}' + + message = f'{message} (With fixes in {fix_message})' + + return message if not sep else f'{message}{sep}{rest}' + + def _format_message_link(self, message, hash): + assert message or hash, 'Improperly defined commit message or override' + message = message if message else hash[:HASH_LENGTH] + return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message + + def _format_issues(self, issues): + return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) + + @staticmethod + def _format_authors(authors): + return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) + + @property + def repo_url(self): + return f'{BASE_URL}/{self._repo}' + + +class CommitRange: + COMMAND = 'git' + COMMIT_SEPARATOR = '-----' + + AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) + MESSAGE_RE = re.compile(r''' + (?:\[(?P<prefix>[^\]]+)\]\ )? + (?:(?P<sub_details>`?[\w.-]+`?): )? + (?P<message>.+?) + (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))? + ''', re.VERBOSE | re.DOTALL) + EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) + REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') + FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})') + UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') + + def __init__(self, start, end, default_author=None): + self._start, self._end = start, end + self._commits, self._fixes = self._get_commits_and_fixes(default_author) + self._commits_added = [] + + def __iter__(self): + return iter(itertools.chain(self._commits.values(), self._commits_added)) + + def __len__(self): + return len(self._commits) + len(self._commits_added) + + def __contains__(self, commit): + if isinstance(commit, Commit): + if not commit.hash: + return False + commit = commit.hash + + return commit in self._commits + + def _get_commits_and_fixes(self, default_author): + result = run_process( + self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', + f'{self._start}..{self._end}' if self._start else self._end).stdout + + commits, reverts = {}, {} + fixes = defaultdict(list) + lines = iter(result.splitlines(False)) + for i, commit_hash in enumerate(lines): + short = next(lines) + skip = short.startswith('Release ') or short == '[version] update' + + authors = [default_author] if default_author else [] + for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): + match = self.AUTHOR_INDICATOR_RE.match(line) + if match: + authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) + + commit = Commit(commit_hash, short, authors) + if skip and (self._start or not i): + logger.debug(f'Skipped commit: {commit}') + continue + elif skip: + logger.debug(f'Reached Release commit, breaking: {commit}') + break + + revert_match = self.REVERT_RE.fullmatch(commit.short) + if revert_match: + reverts[revert_match.group(1)] = commit + continue + + fix_match = self.FIXES_RE.search(commit.short) + if fix_match: + commitish = fix_match.group(1) + fixes[commitish].append(commit) + + commits[commit.hash] = commit + + for commitish, revert_commit in reverts.items(): + reverted = commits.pop(commitish, None) + if reverted: + logger.debug(f'{commitish} fully reverted {reverted}') + else: + commits[revert_commit.hash] = revert_commit + + for commitish, fix_commits in fixes.items(): + if commitish in commits: + hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits) + logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}') + for fix_commit in fix_commits: + del commits[fix_commit.hash] + else: + logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}') + + return commits, fixes + + def apply_overrides(self, overrides): + for override in overrides: + when = override.get('when') + if when and when not in self and when != self._start: + logger.debug(f'Ignored {when!r} override') + continue + + override_hash = override.get('hash') or when + if override['action'] == 'add': + commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) + logger.info(f'ADD {commit}') + self._commits_added.append(commit) + + elif override['action'] == 'remove': + if override_hash in self._commits: + logger.info(f'REMOVE {self._commits[override_hash]}') + del self._commits[override_hash] + + elif override['action'] == 'change': + if override_hash not in self._commits: + continue + commit = Commit(override_hash, override['short'], override.get('authors') or []) + logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') + self._commits[commit.hash] = commit + + self._commits = {key: value for key, value in reversed(self._commits.items())} + + def groups(self): + group_dict = defaultdict(list) + for commit in self: + upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short) + if upstream_re: + commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}' + + match = self.MESSAGE_RE.fullmatch(commit.short) + if not match: + logger.error(f'Error parsing short commit message: {commit.short!r}') + continue + + prefix, sub_details_alt, message, issues = match.groups() + issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] + + if prefix: + groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(','))) + group = next(iter(filter(None, groups)), None) + details = ', '.join(unique(details)) + sub_details = list(itertools.chain.from_iterable(sub_details)) + else: + group = CommitGroup.CORE + details = None + sub_details = [] + + if sub_details_alt: + sub_details.append(sub_details_alt) + sub_details = tuple(unique(sub_details)) + + if not group: + if self.EXTRACTOR_INDICATOR_RE.search(commit.short): + group = CommitGroup.EXTRACTOR + logger.error(f'Assuming [ie] group for {commit.short!r}') + else: + group = CommitGroup.CORE + + commit_info = CommitInfo( + details, sub_details, message.strip(), + issues, commit, self._fixes[commit.hash]) + + logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') + group_dict[group].append(commit_info) + + return group_dict + + @staticmethod + def details_from_prefix(prefix): + if not prefix: + return CommitGroup.CORE, None, () + + prefix, *sub_details = prefix.split(':') + + group, details = CommitGroup.get(prefix) + if group is CommitGroup.PRIORITY and details: + details = details.partition('/')[2].strip() + + if details and '/' in details: + logger.error(f'Prefix is overnested, using first part: {prefix}') + details = details.partition('/')[0].strip() + + if details == 'common': + details = None + elif group is CommitGroup.NETWORKING and details == 'rh': + details = 'Request Handler' + + return group, details, sub_details + + +def get_new_contributors(contributors_path, commits): + contributors = set() + if contributors_path.exists(): + for line in read_file(contributors_path).splitlines(): + author, _, _ = line.strip().partition(' (') + authors = author.split('/') + contributors.update(map(str.casefold, authors)) + + new_contributors = set() + for commit in commits: + for author in commit.authors: + author_folded = author.casefold() + if author_folded not in contributors: + contributors.add(author_folded) + new_contributors.add(author) + + return sorted(new_contributors, key=str.casefold) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser( + description='Create a changelog markdown from a git commit range') + parser.add_argument( + 'commitish', default='HEAD', nargs='?', + help='The commitish to create the range from (default: %(default)s)') + parser.add_argument( + '-v', '--verbosity', action='count', default=0, + help='increase verbosity (can be used twice)') + parser.add_argument( + '-c', '--contributors', action='store_true', + help='update CONTRIBUTORS file (default: %(default)s)') + parser.add_argument( + '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', + help='path to the CONTRIBUTORS file') + parser.add_argument( + '--no-override', action='store_true', + help='skip override json in commit generation (default: %(default)s)') + parser.add_argument( + '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', + help='path to the changelog_override.json file') + parser.add_argument( + '--default-author', default='pukkandan', + help='the author to use without a author indicator (default: %(default)s)') + parser.add_argument( + '--repo', default='yt-dlp/yt-dlp', + help='the github repository to use for the operations (default: %(default)s)') + parser.add_argument( + '--collapsible', action='store_true', + help='make changelog collapsible (default: %(default)s)') + args = parser.parse_args() + + logging.basicConfig( + datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', + level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) + + commits = CommitRange(None, args.commitish, args.default_author) + + if not args.no_override: + if args.override_path.exists(): + overrides = json.loads(read_file(args.override_path)) + commits.apply_overrides(overrides) + else: + logger.warning(f'File {args.override_path.as_posix()} does not exist') + + logger.info(f'Loaded {len(commits)} commits') + + new_contributors = get_new_contributors(args.contributors_path, commits) + if new_contributors: + if args.contributors: + write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') + logger.info(f'New contributors: {", ".join(new_contributors)}') + + print(Changelog(commits.groups(), args.repo, args.collapsible)) diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py new file mode 100755 index 0000000..a06f8a6 --- /dev/null +++ b/devscripts/make_contributing.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import optparse +import re + + +def main(): + return # This is unused in yt-dlp + + parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') + options, args = parser.parse_args() + if len(args) != 2: + parser.error('Expected an input and an output filename') + + infile, outfile = args + + with open(infile, encoding='utf-8') as inf: + readme = inf.read() + + bug_text = re.search( + r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) + dev_text = re.search( + r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', readme).group(1) + + out = bug_text + dev_text + + with open(outfile, 'w', encoding='utf-8') as outf: + outf.write(out) + + +if __name__ == '__main__': + main() diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py new file mode 100644 index 0000000..a5d59f3 --- /dev/null +++ b/devscripts/make_issue_template.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import re + +from devscripts.utils import get_filename_args, read_file, write_file + +VERBOSE_TMPL = ''' + - type: checkboxes + id: verbose + attributes: + label: Provide verbose output that clearly demonstrates the problem + options: + - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`) + required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false + - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below + required: true + - type: textarea + id: log + attributes: + label: Complete Verbose Output + description: | + It should start like this: + placeholder: | + [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) + [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 + [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 + [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] Proxy map: {} + [debug] Request Handlers: urllib, requests + [debug] Loaded 1893 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) + [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc + <more lines> + render: shell + validations: + required: true +'''.strip() + +NO_SKIP = ''' + - type: checkboxes + attributes: + label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE + description: Fill all fields even if you think it is irrelevant for the issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\\* field + required: true +'''.strip() + + +def main(): + fields = {'no_skip': NO_SKIP} + fields['verbose'] = VERBOSE_TMPL % fields + fields['verbose_optional'] = re.sub(r'(\n\s+validations:)?\n\s+required: true', '', fields['verbose']) + + infile, outfile = get_filename_args(has_infile=True) + write_file(outfile, read_file(infile) % fields) + + +if __name__ == '__main__': + main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py new file mode 100644 index 0000000..d74ea20 --- /dev/null +++ b/devscripts/make_lazy_extractors.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import shutil +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from inspect import getsource + +from devscripts.utils import get_filename_args, read_file, write_file + +NO_ATTR = object() +STATIC_CLASS_PROPERTIES = [ + 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching + '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions + 'age_limit', # Used for --age-limit (evaluated) + '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated) +] +CLASS_METHODS = [ + 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching + 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation + 'description', # Used for --extractor-descriptions + 'is_suitable', # Used for --age-limit + 'supports_login', 'is_single_video', # Accessed in CLI only with instance +] +IE_TEMPLATE = ''' +class {name}({bases}): + _module = {module!r} +''' +MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py') + + +def main(): + lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py') + if os.path.exists(lazy_extractors_filename): + os.remove(lazy_extractors_filename) + + _ALL_CLASSES = get_all_ies() # Must be before import + + import yt_dlp.plugins + from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor + + # Filter out plugins + _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')] + + DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) + module_src = '\n'.join(( + MODULE_TEMPLATE, + ' _module = None', + *extra_ie_code(DummyInfoExtractor), + '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n', + *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor), + )) + + write_file(lazy_extractors_filename, f'{module_src}\n') + + +def get_all_ies(): + PLUGINS_DIRNAME = 'ytdlp_plugins' + BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked' + if os.path.exists(PLUGINS_DIRNAME): + # os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958 + shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME) + try: + from yt_dlp.extractor.extractors import _ALL_CLASSES + finally: + if os.path.exists(BLOCKED_DIRNAME): + shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME) + return _ALL_CLASSES + + +def extra_ie_code(ie, base=None): + for var in STATIC_CLASS_PROPERTIES: + val = getattr(ie, var) + if val != (getattr(base, var) if base else NO_ATTR): + yield f' {var} = {val!r}' + yield '' + + for name in CLASS_METHODS: + f = getattr(ie, name) + if not base or f.__func__ != getattr(base, name).__func__: + yield getsource(f) + + +def build_ies(ies, bases, attr_base): + names = [] + for ie in sort_ies(ies, bases): + yield build_lazy_ie(ie, ie.__name__, attr_base) + if ie in ies: + names.append(ie.__name__) + + yield f'\n_ALL_CLASSES = [{", ".join(names)}]' + + +def sort_ies(ies, ignored_bases): + """find the correct sorting and add the required base classes so that subclasses can be correctly created""" + classes, returned_classes = ies[:-1], set() + assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE' + while classes: + for c in classes[:]: + bases = set(c.__bases__) - {object, *ignored_bases} + restart = False + for b in sorted(bases, key=lambda x: x.__name__): + if b not in classes and b not in returned_classes: + assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE' + classes.insert(0, b) + restart = True + if restart: + break + if bases <= returned_classes: + yield c + returned_classes.add(c) + classes.remove(c) + break + yield ies[-1] + + +def build_lazy_ie(ie, name, attr_base): + bases = ', '.join({ + 'InfoExtractor': 'LazyLoadExtractor', + 'SearchInfoExtractor': 'LazyLoadSearchExtractor', + }.get(base.__name__, base.__name__) for base in ie.__bases__) + + s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases) + return s + '\n'.join(extra_ie_code(ie, attr_base)) + + +if __name__ == '__main__': + main() diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py new file mode 100755 index 0000000..2270b31 --- /dev/null +++ b/devscripts/make_readme.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +""" +yt-dlp --help | make_readme.py +This must be run in a console of correct width +""" + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import functools +import re + +from devscripts.utils import read_file, write_file + +README_FILE = 'README.md' + +OPTIONS_START = 'General Options:' +OPTIONS_END = 'CONFIGURATION' +EPILOG_START = 'See full documentation' +ALLOWED_OVERSHOOT = 2 + +DISABLE_PATCH = object() + + +def take_section(text, start=None, end=None, *, shift=0): + return text[ + text.index(start) + shift if start else None: + text.index(end) + shift if end else None + ] + + +def apply_patch(text, patch): + return text if patch[0] is DISABLE_PATCH else re.sub(*patch, text) + + +options = take_section(sys.stdin.read(), f'\n {OPTIONS_START}', f'\n{EPILOG_START}', shift=1) + +max_width = max(map(len, options.split('\n'))) +switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group()) +delim = f'\n{" " * switch_col_width}' + +PATCHES = ( + ( # Standardize `--update` message + r'(?m)^( -U, --update\s+).+(\n \s.+)*$', + r'\1Update this program to the latest version', + ), + ( # Headings + r'(?m)^ (\w.+\n)( (?=\w))?', + r'## \1' + ), + ( # Fixup `--date` formatting + rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$', + (rf'\1[now|today|yesterday][-N[day|week|month|year]].{delim}' + f'E.g. "--date today-2weeks" downloads only{delim}' + 'videos uploaded on the same day two weeks ago'), + ), + ( # Do not split URLs + rf'({delim[:-1]})? (?P<label>\[\S+\] )?(?P<url>https?({delim})?:({delim})?/({delim})?/(({delim})?\S+)+)\s', + lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n')) + ), + ( # Do not split "words" + rf'(?m)({delim}\S+)+$', + lambda mobj: ''.join((delim, mobj.group(0).replace(delim, ''))) + ), + ( # Allow overshooting last line + rf'(?m)^(?P<prev>.+)${delim}(?P<current>.+)$(?!{delim})', + lambda mobj: (mobj.group().replace(delim, ' ') + if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT + else mobj.group()) + ), + ( # Avoid newline when a space is available b/w switch and description + DISABLE_PATCH, # This creates issues with prepare_manpage + r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim), + r'\1 ' + ), + ( # Replace brackets with a Markdown link + r'SponsorBlock API \((http.+)\)', + r'[SponsorBlock API](\1)' + ), +) + +readme = read_file(README_FILE) + +write_file(README_FILE, ''.join(( + take_section(readme, end=f'## {OPTIONS_START}'), + functools.reduce(apply_patch, PATCHES, options), + take_section(readme, f'# {OPTIONS_END}'), +))) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py new file mode 100644 index 0000000..01548ef --- /dev/null +++ b/devscripts/make_supportedsites.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from devscripts.utils import get_filename_args, write_file +from yt_dlp.extractor import list_extractor_classes + + +def main(): + out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False) + write_file(get_filename_args(), f'# Supported sites\n{out}\n') + + +if __name__ == '__main__': + main() diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py new file mode 100644 index 0000000..9b12e71 --- /dev/null +++ b/devscripts/prepare_manpage.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import os.path +import re + +from devscripts.utils import ( + compose_functions, + get_filename_args, + read_file, + write_file, +) + +ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +README_FILE = os.path.join(ROOT_DIR, 'README.md') + +PREFIX = r'''%yt-dlp(1) + +# NAME + +yt\-dlp \- A youtube-dl fork with additional features and patches + +# SYNOPSIS + +**yt-dlp** \[OPTIONS\] URL [URL...] + +# DESCRIPTION + +''' + + +def filter_excluded_sections(readme): + EXCLUDED_SECTION_BEGIN_STRING = re.escape('<!-- MANPAGE: BEGIN EXCLUDED SECTION -->') + EXCLUDED_SECTION_END_STRING = re.escape('<!-- MANPAGE: END EXCLUDED SECTION -->') + return re.sub( + rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n', + '', readme) + + +def move_sections(readme): + MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->' + sections = re.findall(r'(?m)^%s$' % ( + re.escape(MOVE_TAG_TEMPLATE).replace(r'\%', '%') % '(.+)'), readme) + + for section_name in sections: + move_tag = MOVE_TAG_TEMPLATE % section_name + if readme.count(move_tag) > 1: + raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected') + + sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme) + if len(sections) < 1: + raise Exception(f'The section {section_name} does not exist') + elif len(sections) > 1: + raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled') + + readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1) + return readme + + +def filter_options(readme): + section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) + options = '# OPTIONS\n' + for line in section.split('\n')[1:]: + mobj = re.fullmatch(r'''(?x) + \s{4}(?P<opt>-(?:,\s|[^\s])+) + (?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))? + (\s{2,}(?P<desc>.+))? + ''', line) + if not mobj: + options += f'{line.lstrip()}\n' + continue + option, metavar, description = mobj.group('opt', 'meta', 'desc') + + # Pandoc's definition_lists. See http://pandoc.org/README.html + option = f'{option} *{metavar}*' if metavar else option + description = f'{description}\n' if description else '' + options += f'\n{option}\n: {description}' + continue + + return readme.replace(section, options, 1) + + +TRANSFORM = compose_functions(filter_excluded_sections, move_sections, filter_options) + + +def main(): + write_file(get_filename_args(), PREFIX + TRANSFORM(read_file(README_FILE))) + + +if __name__ == '__main__': + main() diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat new file mode 100644 index 0000000..57b1f4b --- /dev/null +++ b/devscripts/run_tests.bat @@ -0,0 +1,4 @@ +@echo off + +>&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead +python %~dp0run_tests.py %~1 diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py new file mode 100755 index 0000000..6d638a9 --- /dev/null +++ b/devscripts/run_tests.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +import argparse +import functools +import os +import re +import subprocess +import sys +from pathlib import Path + + +fix_test_name = functools.partial(re.compile(r'IE(_all|_\d+)?$').sub, r'\1') + + +def parse_args(): + parser = argparse.ArgumentParser(description='Run selected yt-dlp tests') + parser.add_argument( + 'test', help='a extractor tests, or one of "core" or "download"', nargs='*') + parser.add_argument( + '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION') + return parser.parse_args() + + +def run_tests(*tests, pattern=None, ci=False): + run_core = 'core' in tests or (not pattern and not tests) + run_download = 'download' in tests + tests = list(map(fix_test_name, tests)) + + arguments = ['pytest', '-Werror', '--tb=short'] + if ci: + arguments.append('--color=yes') + if run_core: + arguments.extend(['-m', 'not download']) + elif run_download: + arguments.extend(['-m', 'download']) + elif pattern: + arguments.extend(['-k', pattern]) + else: + arguments.extend( + f'test/test_download.py::TestDownload::test_{test}' for test in tests) + + print(f'Running {arguments}', flush=True) + try: + return subprocess.call(arguments) + except FileNotFoundError: + pass + + arguments = [sys.executable, '-Werror', '-m', 'unittest'] + if run_core: + print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True) + return 1 + elif run_download: + arguments.append('test.test_download') + elif pattern: + arguments.extend(['-k', pattern]) + else: + arguments.extend( + f'test.test_download.TestDownload.test_{test}' for test in tests) + + print(f'Running {arguments}', flush=True) + return subprocess.call(arguments) + + +if __name__ == '__main__': + try: + args = parse_args() + + os.chdir(Path(__file__).parent.parent) + sys.exit(run_tests(*args.test, pattern=args.k, ci=bool(os.getenv('CI')))) + except KeyboardInterrupt: + pass diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh new file mode 100755 index 0000000..123ceb1 --- /dev/null +++ b/devscripts/run_tests.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env sh + +>&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead' +python3 devscripts/run_tests.py "$1" diff --git a/devscripts/set-variant.py b/devscripts/set-variant.py new file mode 100644 index 0000000..10341e7 --- /dev/null +++ b/devscripts/set-variant.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import argparse +import functools +import re + +from devscripts.utils import compose_functions, read_file, write_file + +VERSION_FILE = 'yt_dlp/version.py' + + +def parse_options(): + parser = argparse.ArgumentParser(description='Set the build variant of the package') + parser.add_argument('variant', help='Name of the variant') + parser.add_argument('-M', '--update-message', default=None, help='Message to show in -U') + return parser.parse_args() + + +def property_setter(name, value): + return functools.partial(re.sub, rf'(?m)^{name}\s*=\s*.+$', f'{name} = {value!r}') + + +opts = parse_options() +transform = compose_functions( + property_setter('VARIANT', opts.variant), + property_setter('UPDATE_HINT', opts.update_message) +) + +write_file(VERSION_FILE, transform(read_file(VERSION_FILE))) diff --git a/devscripts/tomlparse.py b/devscripts/tomlparse.py new file mode 100755 index 0000000..85ac4ee --- /dev/null +++ b/devscripts/tomlparse.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 + +""" +Simple parser for spec compliant toml files + +A simple toml parser for files that comply with the spec. +Should only be used to parse `pyproject.toml` for `install_deps.py`. + +IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED! +""" + +from __future__ import annotations + +import datetime +import json +import re + +WS = r'(?:[\ \t]*)' +STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'') +SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+') +KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*') +EQUALS_RE = re.compile(rf'={WS}') +WS_RE = re.compile(WS) + +_SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)' +EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE) + +LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*') +LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+') + + +def parse_key(value: str): + for match in SINGLE_KEY_RE.finditer(value): + if match[0][0] == '"': + yield json.loads(match[0]) + elif match[0][0] == '\'': + yield match[0][1:-1] + else: + yield match[0] + + +def get_target(root: dict, paths: list[str], is_list=False): + target = root + + for index, key in enumerate(paths, 1): + use_list = is_list and index == len(paths) + result = target.get(key) + if result is None: + result = [] if use_list else {} + target[key] = result + + if isinstance(result, dict): + target = result + elif use_list: + target = {} + result.append(target) + else: + target = result[-1] + + assert isinstance(target, dict) + return target + + +def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern): + index += 1 + + if match := ws_re.match(data, index): + index = match.end() + + while data[index] != end: + index = yield True, index + + if match := ws_re.match(data, index): + index = match.end() + + if data[index] == ',': + index += 1 + + if match := ws_re.match(data, index): + index = match.end() + + assert data[index] == end + yield False, index + 1 + + +def parse_value(data: str, index: int): + if data[index] == '[': + result = [] + + indices = parse_enclosed(data, index, ']', LIST_WS_RE) + valid, index = next(indices) + while valid: + index, value = parse_value(data, index) + result.append(value) + valid, index = indices.send(index) + + return index, result + + if data[index] == '{': + result = {} + + indices = parse_enclosed(data, index, '}', WS_RE) + valid, index = next(indices) + while valid: + valid, index = indices.send(parse_kv_pair(data, index, result)) + + return index, result + + if match := STRING_RE.match(data, index): + return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1] + + match = LEFTOVER_VALUE_RE.match(data, index) + assert match + value = match[0].strip() + for func in [ + int, + float, + datetime.time.fromisoformat, + datetime.date.fromisoformat, + datetime.datetime.fromisoformat, + {'true': True, 'false': False}.get, + ]: + try: + value = func(value) + break + except Exception: + pass + + return match.end(), value + + +def parse_kv_pair(data: str, index: int, target: dict): + match = KEY_RE.match(data, index) + if not match: + return None + + *keys, key = parse_key(match[0]) + + match = EQUALS_RE.match(data, match.end()) + assert match + index = match.end() + + index, value = parse_value(data, index) + get_target(target, keys)[key] = value + return index + + +def parse_toml(data: str): + root = {} + target = root + + index = 0 + while True: + match = EXPRESSION_RE.search(data, index) + if not match: + break + + if match.group('subtable'): + index = match.end() + path, is_list = match.group('path', 'is_list') + target = get_target(root, list(parse_key(path)), bool(is_list)) + continue + + index = parse_kv_pair(data, match.start(), target) + assert index is not None + + return root + + +def main(): + import argparse + from pathlib import Path + + parser = argparse.ArgumentParser() + parser.add_argument('infile', type=Path, help='The TOML file to read as input') + args = parser.parse_args() + + with args.infile.open('r', encoding='utf-8') as file: + data = file.read() + + def default(obj): + if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)): + return obj.isoformat() + + print(json.dumps(parse_toml(data), default=default)) + + +if __name__ == '__main__': + main() diff --git a/devscripts/update-version.py b/devscripts/update-version.py new file mode 100644 index 0000000..da54a6a --- /dev/null +++ b/devscripts/update-version.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import argparse +import contextlib +import sys +from datetime import datetime, timezone + +from devscripts.utils import read_version, run_process, write_file + + +def get_new_version(version, revision): + if not version: + version = datetime.now(timezone.utc).strftime('%Y.%m.%d') + + if revision: + assert revision.isdecimal(), 'Revision must be a number' + else: + old_version = read_version().split('.') + if version.split('.') == old_version[:3]: + revision = str(int((old_version + [0])[3]) + 1) + + return f'{version}.{revision}' if revision else version + + +def get_git_head(): + with contextlib.suppress(Exception): + return run_process('git', 'rev-parse', 'HEAD').stdout.strip() + + +VERSION_TEMPLATE = '''\ +# Autogenerated by devscripts/update-version.py + +__version__ = {version!r} + +RELEASE_GIT_HEAD = {git_head!r} + +VARIANT = None + +UPDATE_HINT = None + +CHANNEL = {channel!r} + +ORIGIN = {origin!r} + +_pkg_version = {package_version!r} +''' + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Update the version.py file') + parser.add_argument( + '-c', '--channel', default='stable', + help='Select update channel (default: %(default)s)') + parser.add_argument( + '-r', '--origin', default='local', + help='Select origin/repository (default: %(default)s)') + parser.add_argument( + '-s', '--suffix', default='', + help='Add an alphanumeric suffix to the package version, e.g. "dev"') + parser.add_argument( + '-o', '--output', default='yt_dlp/version.py', + help='The output file to write to (default: %(default)s)') + parser.add_argument( + 'version', nargs='?', default=None, + help='A version or revision to use instead of generating one') + args = parser.parse_args() + + git_head = get_git_head() + version = ( + args.version if args.version and '.' in args.version + else get_new_version(None, args.version)) + write_file(args.output, VERSION_TEMPLATE.format( + version=version, git_head=git_head, channel=args.channel, origin=args.origin, + package_version=f'{version}{args.suffix}')) + + print(f'version={version} ({args.channel}), head={git_head}') diff --git a/devscripts/utils.py b/devscripts/utils.py new file mode 100644 index 0000000..a952c9f --- /dev/null +++ b/devscripts/utils.py @@ -0,0 +1,47 @@ +import argparse +import functools +import subprocess + + +def read_file(fname): + with open(fname, encoding='utf-8') as f: + return f.read() + + +def write_file(fname, content, mode='w'): + with open(fname, mode, encoding='utf-8') as f: + return f.write(content) + + +def read_version(fname='yt_dlp/version.py', varname='__version__'): + """Get the version without importing the package""" + items = {} + exec(compile(read_file(fname), fname, 'exec'), items) + return items[varname] + + +def get_filename_args(has_infile=False, default_outfile=None): + parser = argparse.ArgumentParser() + if has_infile: + parser.add_argument('infile', help='Input file') + kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {} + parser.add_argument('outfile', **kwargs, help='Output file') + + opts = parser.parse_args() + if has_infile: + return opts.infile, opts.outfile + return opts.outfile + + +def compose_functions(*functions): + return lambda x: functools.reduce(lambda y, f: f(y), functions, x) + + +def run_process(*args, **kwargs): + kwargs.setdefault('text', True) + kwargs.setdefault('check', True) + kwargs.setdefault('capture_output', True) + if kwargs['text']: + kwargs.setdefault('encoding', 'utf-8') + kwargs.setdefault('errors', 'replace') + return subprocess.run(args, **kwargs) diff --git a/devscripts/zsh-completion.in b/devscripts/zsh-completion.in new file mode 100644 index 0000000..9117d33 --- /dev/null +++ b/devscripts/zsh-completion.in @@ -0,0 +1,30 @@ +#compdef yt-dlp + +__yt_dlp() { + local curcontext="$curcontext" fileopts diropts cur prev + typeset -A opt_args + fileopts="{{fileopts}}" + diropts="{{diropts}}" + cur=$words[CURRENT] + case $cur in + :) + _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)' + ;; + *) + prev=$words[CURRENT-1] + if [[ ${prev} =~ ${fileopts} ]]; then + _path_files + elif [[ ${prev} =~ ${diropts} ]]; then + _path_files -/ + elif [[ ${prev} == "--remux-video" ]]; then + _arguments '*: :(mp4 mkv)' + elif [[ ${prev} == "--recode-video" ]]; then + _arguments '*: :(mp4 flv ogg webm mkv)' + else + _arguments '*: :({{flags}})' + fi + ;; + esac +} + +__yt_dlp \ No newline at end of file diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py new file mode 100755 index 0000000..267af5f --- /dev/null +++ b/devscripts/zsh-completion.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import yt_dlp + +ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp" +ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" + + +def build_completion(opt_parser): + opts = [opt for group in opt_parser.option_groups + for opt in group.option_list] + opts_file = [opt for opt in opts if opt.metavar == "FILE"] + opts_dir = [opt for opt in opts if opt.metavar == "DIR"] + + fileopts = [] + for opt in opts_file: + if opt._short_opts: + fileopts.extend(opt._short_opts) + if opt._long_opts: + fileopts.extend(opt._long_opts) + + diropts = [] + for opt in opts_dir: + if opt._short_opts: + diropts.extend(opt._short_opts) + if opt._long_opts: + diropts.extend(opt._long_opts) + + flags = [opt.get_opt_string() for opt in opts] + + with open(ZSH_COMPLETION_TEMPLATE) as f: + template = f.read() + + template = template.replace("{{fileopts}}", "|".join(fileopts)) + template = template.replace("{{diropts}}", "|".join(diropts)) + template = template.replace("{{flags}}", " ".join(flags)) + + with open(ZSH_COMPLETION_FILE, "w") as f: + f.write(template) + + +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] +build_completion(parser) diff --git a/public.key b/public.key new file mode 100644 index 0000000..b3af31e --- /dev/null +++ b/public.key @@ -0,0 +1,29 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBGP78C4BEAD0rF9zjGPAt0thlt5C1ebzccAVX7Nb1v+eqQjk+WEZdTETVCg3 +WAM5ngArlHdm/fZqzUgO+pAYrB60GKeg7ffUDf+S0XFKEZdeRLYeAaqqKhSibVal +DjvOBOztu3W607HLETQAqA7wTPuIt2WqmpL60NIcyr27LxqmgdN3mNvZ2iLO+bP0 +nKR/C+PgE9H4ytywDa12zMx6PmZCnVOOOu6XZEFmdUxxdQ9fFDqd9LcBKY2LDOcS +Yo1saY0YWiZWHtzVoZu1kOzjnS5Fjq/yBHJLImDH7pNxHm7s/PnaurpmQFtDFruk +t+2lhDnpKUmGr/I/3IHqH/X+9nPoS4uiqQ5HpblB8BK+4WfpaiEg75LnvuOPfZIP +KYyXa/0A7QojMwgOrD88ozT+VCkKkkJ+ijXZ7gHNjmcBaUdKK7fDIEOYI63Lyc6Q +WkGQTigFffSUXWHDCO9aXNhP3ejqFWgGMtCUsrbkcJkWuWY7q5ARy/05HbSM3K4D +U9eqtnxmiV1WQ8nXuI9JgJQRvh5PTkny5LtxqzcmqvWO9TjHBbrs14BPEO9fcXxK +L/CFBbzXDSvvAgArdqqlMoncQ/yicTlfL6qzJ8EKFiqW14QMTdAn6SuuZTodXCTi +InwoT7WjjuFPKKdvfH1GP4bnqdzTnzLxCSDIEtfyfPsIX+9GI7Jkk/zZjQARAQAB +tDdTaW1vbiBTYXdpY2tpICh5dC1kbHAgc2lnbmluZyBrZXkpIDxjb250YWN0QGdy +dWI0ay54eXo+iQJOBBMBCgA4FiEErAy75oSNaoc0ZK9OV89lkztadYEFAmP78C4C +GwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQV89lkztadYEVqQ//cW7TxhXg +7Xbh2EZQzXml0egn6j8QaV9KzGragMiShrlvTO2zXfLXqyizrFP4AspgjSn/4NrI +8mluom+Yi+qr7DXT4BjQqIM9y3AjwZPdywe912Lxcw52NNoPZCm24I9T7ySc8lmR +FQvZC0w4H/VTNj/2lgJ1dwMflpwvNRiWa5YzcFGlCUeDIPskLx9++AJE+xwU3LYm +jQQsPBqpHHiTBEJzMLl+rfd9Fg4N+QNzpFkTDW3EPerLuvJniSBBwZthqxeAtw4M +UiAXh6JvCc2hJkKCoygRfM281MeolvmsGNyQm+axlB0vyldiPP6BnaRgZlx+l6MU +cPqgHblb7RW5j9lfr6OYL7SceBIHNv0CFrt1OnkGo/tVMwcs8LH3Ae4a7UJlIceL +V54aRxSsZU7w4iX+PB79BWkEsQzwKrUuJVOeL4UDwWajp75OFaUqbS/slDDVXvK5 +OIeuth3mA/adjdvgjPxhRQjA3l69rRWIJDrqBSHldmRsnX6cvXTDy8wSXZgy51lP +m4IVLHnCy9m4SaGGoAsfTZS0cC9FgjUIyTyrq9M67wOMpUxnuB0aRZgJE1DsI23E +qdvcSNVlO+39xM/KPWUEh6b83wMn88QeW+DCVGWACQq5N3YdPnAJa50617fGbY6I +gXIoRHXkDqe23PZ/jURYCv0sjVtjPoVC+bg= +=bJkn +-----END PGP PUBLIC KEY BLOCK----- diff --git a/pyinst.py b/pyinst.py new file mode 100755 index 0000000..4a8ed2d --- /dev/null +++ b/pyinst.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 + +# Allow execution from anywhere +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import warnings + +from bundle.pyinstaller import main + +warnings.warn(DeprecationWarning('`pyinst.py` is deprecated and will be removed in a future version. ' + 'Use `bundle.pyinstaller` instead')) + +if __name__ == '__main__': + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..64504ff --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,120 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "yt-dlp" +maintainers = [ + {name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"}, + {name = "Grub4K", email = "contact@grub4k.xyz"}, + {name = "bashonly", email = "bashonly@protonmail.com"}, + {name = "coletdjnz", email = "coletdjnz@protonmail.com"}, +] +description = "A youtube-dl fork with additional features and patches" +readme = "README.md" +requires-python = ">=3.8" +keywords = [ + "youtube-dl", + "video-downloader", + "youtube-downloader", + "sponsorblock", + "youtube-dlc", + "yt-dlp", +] +license = {file = "LICENSE"} +classifiers = [ + "Topic :: Multimedia :: Video", + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "License :: OSI Approved :: The Unlicense (Unlicense)", + "Operating System :: OS Independent", +] +dynamic = ["version"] +dependencies = [ + "brotli; implementation_name=='cpython'", + "brotlicffi; implementation_name!='cpython'", + "certifi", + "mutagen", + "pycryptodomex", + "requests>=2.31.0,<3", + "urllib3>=1.26.17,<3", + "websockets>=12.0", +] + +[project.optional-dependencies] +default = [] +secretstorage = [ + "cffi", + "secretstorage", +] +build = [ + "build", + "hatchling", + "pip", + "wheel", +] +dev = [ + "flake8", + "isort", + "pytest", +] +pyinstaller = ["pyinstaller>=6.3"] +py2exe = ["py2exe>=0.12"] + +[project.urls] +Documentation = "https://github.com/yt-dlp/yt-dlp#readme" +Repository = "https://github.com/yt-dlp/yt-dlp" +Tracker = "https://github.com/yt-dlp/yt-dlp/issues" +Funding = "https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators" + +[project.scripts] +yt-dlp = "yt_dlp:main" + +[project.entry-points.pyinstaller40] +hook-dirs = "yt_dlp.__pyinstaller:get_hook_dirs" + +[tool.hatch.build.targets.sdist] +include = [ + "/yt_dlp", + "/devscripts", + "/test", + "/.gitignore", # included by default, needed for auto-excludes + "/Changelog.md", + "/LICENSE", # included as license + "/pyproject.toml", # included by default + "/README.md", # included as readme + "/setup.cfg", + "/supportedsites.md", +] +artifacts = [ + "/yt_dlp/extractor/lazy_extractors.py", + "/completions", + "/AUTHORS", # included by default + "/README.txt", + "/yt-dlp.1", +] + +[tool.hatch.build.targets.wheel] +packages = ["yt_dlp"] +artifacts = ["/yt_dlp/extractor/lazy_extractors.py"] + +[tool.hatch.build.targets.wheel.shared-data] +"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp" +"completions/zsh/_yt-dlp" = "share/zsh/site-functions/_yt-dlp" +"completions/fish/yt-dlp.fish" = "share/fish/vendor_completions.d/yt-dlp.fish" +"README.txt" = "share/doc/yt_dlp/README.txt" +"yt-dlp.1" = "share/man/man1/yt-dlp.1" + +[tool.hatch.version] +path = "yt_dlp/version.py" +pattern = "_pkg_version = '(?P<version>[^']+)'" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..aeb4cee --- /dev/null +++ b/setup.cfg @@ -0,0 +1,45 @@ +[flake8] +exclude = build,venv,.tox,.git,.pytest_cache +ignore = E402,E501,E731,E741,W503 +max_line_length = 120 +per_file_ignores = + devscripts/lazy_load_template.py: F401 + + +[autoflake] +ignore-init-module-imports = true +ignore-pass-after-docstring = true +remove-all-unused-imports = true +remove-duplicate-keys = true +remove-unused-variables = true + + +[tool:pytest] +addopts = -ra -v --strict-markers +markers = + download + + +[tox:tox] +skipsdist = true +envlist = py{38,39,310,311,312},pypy{38,39,310} +skip_missing_interpreters = true + +[testenv] # tox +deps = + pytest +commands = pytest {posargs:"-m not download"} +passenv = HOME # For test_compat_expanduser +setenv = + # PYTHONWARNINGS = error # Catches PIP's warnings too + + +[isort] +py_version = 38 +multi_line_output = VERTICAL_HANGING_INDENT +line_length = 80 +reverse_relative = true +ensure_newline_before_comments = true +include_trailing_comma = true +known_first_party = + test diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..8d1e6d1 --- /dev/null +++ b/setup.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +# Allow execution from anywhere +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import warnings + + +if sys.argv[1:2] == ['py2exe']: + warnings.warn(DeprecationWarning('`setup.py py2exe` is deprecated and will be removed in a future version. ' + 'Use `bundle.py2exe` instead')) + + import bundle.py2exe + + bundle.py2exe.main() + +elif 'build_lazy_extractors' in sys.argv: + warnings.warn(DeprecationWarning('`setup.py build_lazy_extractors` is deprecated and will be removed in a future version. ' + 'Use `devscripts.make_lazy_extractors` instead')) + + import subprocess + + os.chdir(sys.path[0]) + print('running build_lazy_extractors') + subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py']) + +else: + + print( + 'ERROR: Building by calling `setup.py` is deprecated. ' + 'Use a build frontend like `build` instead. ', + 'Refer to https://build.pypa.io for more info', file=sys.stderr) + sys.exit(1) diff --git a/supportedsites.md b/supportedsites.md new file mode 100644 index 0000000..a4b2d57 --- /dev/null +++ b/supportedsites.md @@ -0,0 +1,1794 @@ +# Supported sites + - **17live** + - **17live:clip** + - **1News**: 1news.co.nz article videos + - **1tv**: Первый канал + - **20min** + - **23video** + - **247sports**: (**Currently broken**) + - **24tv.ua** + - **3qsdn**: 3Q SDN + - **3sat** + - **4tube** + - **56.com** + - **6play** + - **7plus** + - **8tracks** + - **91porn** + - **9c9media** + - **9gag**: 9GAG + - **9News** + - **9now.com.au** + - **abc.net.au** + - **abc.net.au:iview** + - **abc.net.au:​iview:showseries** + - **abcnews** + - **abcnews:video** + - **abcotvs**: ABC Owned Television Stations + - **abcotvs:clips** + - **AbemaTV**: [*abematv*](## "netrc machine") + - **AbemaTVTitle**: [*abematv*](## "netrc machine") + - **AcademicEarth:Course** + - **acast** + - **acast:channel** + - **AcFunBangumi** + - **AcFunVideo** + - **ADN**: [*animationdigitalnetwork*](## "netrc machine") Animation Digital Network + - **ADNSeason**: [*animationdigitalnetwork*](## "netrc machine") Animation Digital Network + - **AdobeConnect** + - **adobetv** + - **adobetv:channel** + - **adobetv:embed** + - **adobetv:show** + - **adobetv:video** + - **AdultSwim** + - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault + - **aenetworks:collection** + - **aenetworks:show** + - **AeonCo** + - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com + - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com + - **afreecatv:user** + - **AirTV** + - **AitubeKZVideo** + - **AliExpressLive** + - **AlJazeera** + - **Allocine** + - **Allstar** + - **AllstarProfile** + - **AlphaPorno** + - **Alsace20TV** + - **Alsace20TVEmbed** + - **altcensored** + - **altcensored:channel** + - **Alura**: [*alura*](## "netrc machine") + - **AluraCourse**: [*aluracourse*](## "netrc machine") + - **AmadeusTV** + - **Amara** + - **AmazonMiniTV** + - **amazonminitv:season**: Amazon MiniTV Season, "minitv:season:" prefix + - **amazonminitv:series**: Amazon MiniTV Series, "minitv:series:" prefix + - **AmazonReviews** + - **AmazonStore** + - **AMCNetworks** + - **AmericasTestKitchen** + - **AmericasTestKitchenSeason** + - **AmHistoryChannel** + - **AnchorFMEpisode** + - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **Angel** + - **AnimalPlanet** + - **ant1newsgr:article**: ant1news.gr articles + - **ant1newsgr:embed**: ant1news.gr embedded videos + - **antenna:watch**: antenna.gr and ant1news.gr videos + - **Anvato** + - **aol.com**: Yahoo screen and movies (**Currently broken**) + - **APA** + - **Aparat** + - **AppleConnect** + - **AppleDaily**: 臺灣蘋果日報 + - **ApplePodcasts** + - **appletrailers** + - **appletrailers:section** + - **archive.org**: archive.org video and audio + - **ArcPublishing** + - **ARD** + - **ARDMediathek** + - **ARDMediathekCollection** + - **Arkena** + - **Art19** + - **Art19Show** + - **arte.sky.it** + - **ArteTV** + - **ArteTVCategory** + - **ArteTVEmbed** + - **ArteTVPlaylist** + - **asobichannel**: ASOBI CHANNEL + - **asobichannel:tag**: ASOBI CHANNEL + - **AtresPlayer**: [*atresplayer*](## "netrc machine") + - **AtScaleConfEvent** + - **ATVAt** + - **AudiMedia** + - **AudioBoom** + - **Audiodraft:custom** + - **Audiodraft:generic** + - **audiomack** + - **audiomack:album** + - **Audius**: Audius.co + - **audius:artist**: Audius.co profile/artist pages + - **audius:playlist**: Audius.co playlists + - **audius:track**: Audius track ID or API link. Prepend with "audius:" + - **AWAAN** + - **awaan:live** + - **awaan:season** + - **awaan:video** + - **axs.tv** + - **AZMedien**: AZ Medien videos + - **BaiduVideo**: 百度视频 + - **BanBye** + - **BanByeChannel** + - **bandaichannel** + - **Bandcamp** + - **Bandcamp:album** + - **Bandcamp:user** + - **Bandcamp:weekly** + - **BannedVideo** + - **bbc**: [*bbc*](## "netrc machine") BBC + - **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer + - **bbc.co.uk:article**: BBC articles + - **bbc.co.uk:​iplayer:episodes** + - **bbc.co.uk:​iplayer:group** + - **bbc.co.uk:playlist** + - **BBVTV**: [*bbvtv*](## "netrc machine") + - **BBVTVLive**: [*bbvtv*](## "netrc machine") + - **BBVTVRecordings**: [*bbvtv*](## "netrc machine") + - **BeatBumpPlaylist** + - **BeatBumpVideo** + - **Beatport** + - **Beeg** + - **BehindKink**: (**Currently broken**) + - **Bellator** + - **BellMedia** + - **BerufeTV** + - **Bet**: (**Currently broken**) + - **bfi:player**: (**Currently broken**) + - **bfmtv** + - **bfmtv:article** + - **bfmtv:live** + - **bibeltv:live**: BibelTV live program + - **bibeltv:series**: BibelTV series playlist + - **bibeltv:video**: BibelTV single video + - **Bigflix** + - **Bigo** + - **Bild**: Bild.de + - **BiliBili** + - **Bilibili category extractor** + - **BilibiliAudio** + - **BilibiliAudioAlbum** + - **BiliBiliBangumi** + - **BiliBiliBangumiMedia** + - **BiliBiliBangumiSeason** + - **BilibiliCheese** + - **BilibiliCheeseSeason** + - **BilibiliCollectionList** + - **BilibiliFavoritesList** + - **BiliBiliPlayer** + - **BilibiliPlaylist** + - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix + - **BilibiliSeriesList** + - **BilibiliSpaceAudio** + - **BilibiliSpaceVideo** + - **BilibiliWatchlater** + - **BiliIntl**: [*biliintl*](## "netrc machine") + - **biliIntl:series**: [*biliintl*](## "netrc machine") + - **BiliLive** + - **BioBioChileTV** + - **Biography** + - **BitChute** + - **BitChuteChannel** + - **BlackboardCollaborate** + - **BleacherReport**: (**Currently broken**) + - **BleacherReportCMS**: (**Currently broken**) + - **blerp** + - **blogger.com** + - **Bloomberg** + - **BokeCC** + - **BongaCams** + - **Boosty** + - **BostonGlobe** + - **Box** + - **BoxCastVideo** + - **Bpb**: Bundeszentrale für politische Bildung + - **BR**: Bayerischer Rundfunk (**Currently broken**) + - **BrainPOP**: [*brainpop*](## "netrc machine") + - **BrainPOPELL**: [*brainpop*](## "netrc machine") + - **BrainPOPEsp**: [*brainpop*](## "netrc machine") BrainPOP Español + - **BrainPOPFr**: [*brainpop*](## "netrc machine") BrainPOP Français + - **BrainPOPIl**: [*brainpop*](## "netrc machine") BrainPOP Hebrew + - **BrainPOPJr**: [*brainpop*](## "netrc machine") + - **BravoTV** + - **BreitBart** + - **brightcove:legacy** + - **brightcove:new** + - **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org + - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org + - **bt:article**: Bergens Tidende Articles + - **bt:vestlendingen**: Bergens Tidende - Vestlendingen + - **Bundesliga** + - **Bundestag** + - **BusinessInsider** + - **BuzzFeed** + - **BYUtv**: (**Currently broken**) + - **CableAV** + - **Callin** + - **Caltrans** + - **CAM4** + - **Camdemy** + - **CamdemyFolder** + - **CamFMEpisode** + - **CamFMShow** + - **CamModels** + - **Camsoda** + - **CamtasiaEmbed** + - **Canal1** + - **CanalAlpha** + - **canalc2.tv** + - **Canalplus**: mycanal.fr and piwiplus.fr + - **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine") + - **CartoonNetwork** + - **cbc.ca** + - **cbc.ca:player** + - **cbc.ca:​player:playlist** + - **CBS**: (**Currently broken**) + - **CBSLocal** + - **CBSLocalArticle** + - **CBSLocalLive** + - **cbsnews**: CBS News + - **cbsnews:embed** + - **cbsnews:live**: CBS News Livestream + - **cbsnews:livevideo**: CBS News Live Videos + - **cbssports**: (**Currently broken**) + - **cbssports:embed**: (**Currently broken**) + - **CCMA** + - **CCTV**: 央视网 + - **CDA**: [*cdapl*](## "netrc machine") + - **Cellebrite** + - **CeskaTelevize** + - **CGTN** + - **CharlieRose** + - **Chaturbate** + - **Chilloutzone** + - **chzzk:live** + - **chzzk:video** + - **cielotv.it** + - **Cinemax**: (**Currently broken**) + - **CinetecaMilano** + - **Cineverse** + - **CineverseDetails** + - **CiscoLiveSearch** + - **CiscoLiveSession** + - **ciscowebex**: Cisco Webex + - **CJSW** + - **Clipchamp** + - **Clippit** + - **ClipRs**: (**Currently broken**) + - **ClipYouEmbed** + - **CloserToTruth**: (**Currently broken**) + - **CloudflareStream** + - **CloudyCDN** + - **Clubic**: (**Currently broken**) + - **Clyp** + - **cmt.com**: (**Currently broken**) + - **CNBCVideo** + - **CNN** + - **CNNArticle** + - **CNNBlogs** + - **CNNIndonesia** + - **ComedyCentral** + - **ComedyCentralTV** + - **ConanClassic** + - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED + - **CONtv** + - **CookingChannel** + - **Corus** + - **Coub** + - **CozyTV** + - **cp24** + - **cpac** + - **cpac:playlist** + - **Cracked** + - **Crackle** + - **Craftsy** + - **CrooksAndLiars** + - **CrowdBunker** + - **CrowdBunkerChannel** + - **Crtvg** + - **crunchyroll**: [*crunchyroll*](## "netrc machine") + - **crunchyroll:artist**: [*crunchyroll*](## "netrc machine") + - **crunchyroll:music**: [*crunchyroll*](## "netrc machine") + - **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine") + - **CSpan**: C-SPAN + - **CSpanCongress** + - **CtsNews**: 華視新聞 + - **CTV** + - **CTVNews** + - **cu.ntv.co.jp**: Nippon Television Network + - **CultureUnplugged** + - **curiositystream**: [*curiositystream*](## "netrc machine") + - **curiositystream:collections**: [*curiositystream*](## "netrc machine") + - **curiositystream:series**: [*curiositystream*](## "netrc machine") + - **CWTV** + - **Cybrary**: [*cybrary*](## "netrc machine") + - **CybraryCourse**: [*cybrary*](## "netrc machine") + - **DacastPlaylist** + - **DacastVOD** + - **DagelijkseKost**: dagelijksekost.een.be + - **DailyMail** + - **dailymotion**: [*dailymotion*](## "netrc machine") + - **dailymotion:playlist**: [*dailymotion*](## "netrc machine") + - **dailymotion:search**: [*dailymotion*](## "netrc machine") + - **dailymotion:user**: [*dailymotion*](## "netrc machine") + - **DailyWire** + - **DailyWirePodcast** + - **damtomo:record** + - **damtomo:video** + - **daum.net** + - **daum.net:clip** + - **daum.net:playlist** + - **daum.net:user** + - **daystar:clip** + - **DBTV** + - **DctpTv** + - **DeezerAlbum** + - **DeezerPlaylist** + - **democracynow** + - **DestinationAmerica** + - **DetikEmbed** + - **DeuxM** + - **DeuxMNews** + - **DHM**: Filmarchiv - Deutsches Historisches Museum (**Currently broken**) + - **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor + - **DigitallySpeaking** + - **Digiteka** + - **DiscogsReleasePlaylist** + - **Discovery** + - **DiscoveryLife** + - **DiscoveryNetworksDe** + - **DiscoveryPlus** + - **DiscoveryPlusIndia** + - **DiscoveryPlusIndiaShow** + - **DiscoveryPlusItaly** + - **DiscoveryPlusItalyShow** + - **Disney** + - **DIYNetwork** + - **dlf** + - **dlf:corpus**: DLF Multi-feed Archives + - **dlive:stream** + - **dlive:vod** + - **Douyin** + - **DouyuShow** + - **DouyuTV**: 斗鱼直播 + - **DPlay** + - **DRBonanza** + - **Drooble** + - **Dropbox** + - **Dropout**: [*dropout*](## "netrc machine") + - **DropoutSeason** + - **DrTuber** + - **drtv** + - **drtv:live** + - **drtv:season** + - **drtv:series** + - **DTube**: (**Currently broken**) + - **duboku**: www.duboku.io + - **duboku:list**: www.duboku.io entire series + - **Dumpert** + - **Duoplay** + - **dvtv**: http://video.aktualne.cz/ + - **dw**: (**Currently broken**) + - **dw:article**: (**Currently broken**) + - **EaglePlatform** + - **EbaumsWorld** + - **Ebay** + - **egghead:course**: egghead.io course + - **egghead:lesson**: egghead.io lesson + - **EinsUndEinsTV**: [*1und1tv*](## "netrc machine") + - **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine") + - **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine") + - **Einthusan** + - **eitb.tv** + - **ElementorEmbed** + - **Elonet** + - **ElPais**: El País + - **ElTreceTV**: El Trece TV (Argentina) + - **Embedly** + - **EMPFlix** + - **Epicon** + - **EpiconSeries** + - **EpidemicSound** + - **eplus**: [*eplus*](## "netrc machine") e+ (イープラス) + - **Epoch** + - **Eporner** + - **Erocast** + - **EroProfile**: [*eroprofile*](## "netrc machine") + - **EroProfile:album** + - **ERRJupiter** + - **ertflix**: ERTFLIX videos + - **ertflix:codename**: ERTFLIX videos by codename + - **ertwebtv:embed**: ert.gr webtv embedded videos + - **ESPN** + - **ESPNArticle** + - **ESPNCricInfo** + - **EttuTv** + - **Europa**: (**Currently broken**) + - **EuroParlWebstream** + - **EuropeanTour** + - **Eurosport** + - **EUScreen** + - **EWETV**: [*ewetv*](## "netrc machine") + - **EWETVLive**: [*ewetv*](## "netrc machine") + - **EWETVRecordings**: [*ewetv*](## "netrc machine") + - **Expressen** + - **EyedoTV** + - **facebook**: [*facebook*](## "netrc machine") + - **facebook:ads** + - **facebook:reel** + - **FacebookPluginsVideo** + - **fancode:live**: [*fancode*](## "netrc machine") (**Currently broken**) + - **fancode:vod**: [*fancode*](## "netrc machine") (**Currently broken**) + - **faz.net** + - **fc2**: [*fc2*](## "netrc machine") + - **fc2:embed** + - **fc2:live** + - **Fczenit** + - **Fifa** + - **filmon** + - **filmon:channel** + - **Filmweb** + - **FiveThirtyEight** + - **FiveTV** + - **FlexTV** + - **Flickr** + - **Floatplane** + - **FloatplaneChannel** + - **Folketinget**: Folketinget (ft.dk; Danish parliament) + - **FoodNetwork** + - **FootyRoom** + - **Formula1** + - **FOX** + - **FOX9** + - **FOX9News** + - **foxnews**: Fox News and Fox Business Video + - **foxnews:article** + - **FoxNewsVideo** + - **FoxSports** + - **fptplay**: fptplay.vn + - **FranceCulture** + - **FranceInter** + - **FranceTV** + - **francetvinfo.fr** + - **FranceTVSite** + - **Freesound** + - **freespeech.org** + - **freetv:series** + - **FreeTvMovies** + - **FrontendMasters**: [*frontendmasters*](## "netrc machine") + - **FrontendMastersCourse**: [*frontendmasters*](## "netrc machine") + - **FrontendMastersLesson**: [*frontendmasters*](## "netrc machine") + - **FujiTVFODPlus7** + - **Funimation**: [*funimation*](## "netrc machine") + - **funimation:page**: [*funimation*](## "netrc machine") + - **funimation:show**: [*funimation*](## "netrc machine") + - **Funk** + - **Funker530** + - **Fux** + - **FuyinTV** + - **Gab** + - **GabTV** + - **Gaia**: [*gaia*](## "netrc machine") + - **GameJolt** + - **GameJoltCommunity** + - **GameJoltGame** + - **GameJoltGameSoundtrack** + - **GameJoltSearch** + - **GameJoltUser** + - **GameSpot** + - **GameStar** + - **Gaskrank** + - **Gazeta**: (**Currently broken**) + - **GDCVault**: [*gdcvault*](## "netrc machine") (**Currently broken**) + - **GediDigital** + - **gem.cbc.ca**: [*cbcgem*](## "netrc machine") + - **gem.cbc.ca:live** + - **gem.cbc.ca:playlist** + - **Genius** + - **GeniusLyrics** + - **GetCourseRu**: [*getcourseru*](## "netrc machine") + - **GetCourseRuPlayer** + - **Gettr** + - **GettrStreaming** + - **GiantBomb** + - **GlattvisionTV**: [*glattvisiontv*](## "netrc machine") + - **GlattvisionTVLive**: [*glattvisiontv*](## "netrc machine") + - **GlattvisionTVRecordings**: [*glattvisiontv*](## "netrc machine") + - **Glide**: Glide mobile video messages (glide.me) + - **GlobalCyclingNetworkPlus** + - **GlobalPlayerAudio** + - **GlobalPlayerAudioEpisode** + - **GlobalPlayerLive** + - **GlobalPlayerLivePlaylist** + - **GlobalPlayerVideo** + - **Globo**: [*globo*](## "netrc machine") + - **GloboArticle** + - **glomex**: Glomex videos + - **glomex:embed**: Glomex embedded videos + - **GMANetworkVideo** + - **Go** + - **GoDiscovery** + - **GodTube**: (**Currently broken**) + - **Gofile** + - **Golem** + - **goodgame:stream** + - **google:podcasts** + - **google:​podcasts:feed** + - **GoogleDrive** + - **GoogleDrive:Folder** + - **GoPlay**: [*goplay*](## "netrc machine") + - **GoPro** + - **Goshgay** + - **GoToStage** + - **GPUTechConf** + - **Gronkh** + - **gronkh:feed** + - **gronkh:vods** + - **Groupon** + - **Harpodeon** + - **hbo** + - **HearThisAt** + - **Heise** + - **HellPorno** + - **hetklokhuis** + - **hgtv.com:show** + - **HGTVDe** + - **HGTVUsa** + - **HiDive**: [*hidive*](## "netrc machine") + - **HistoricFilms** + - **history:player** + - **history:topic**: History.com Topic + - **HitRecord** + - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau + - **HollywoodReporter** + - **HollywoodReporterPlaylist** + - **Holodex** + - **HotNewHipHop**: (**Currently broken**) + - **hotstar** + - **hotstar:playlist** + - **hotstar:season** + - **hotstar:series** + - **hrfernsehen** + - **HRTi**: [*hrti*](## "netrc machine") + - **HRTiPlaylist**: [*hrti*](## "netrc machine") + - **HSEProduct** + - **HSEShow** + - **html5** + - **Huajiao**: 花椒直播 + - **HuffPost**: Huffington Post + - **Hungama** + - **HungamaAlbumPlaylist** + - **HungamaSong** + - **huya:live**: huya.com + - **Hypem** + - **Hytale** + - **Icareus** + - **IdolPlus** + - **iflix:episode** + - **IflixSeries** + - **ign.com** + - **IGNArticle** + - **IGNVideo** + - **iheartradio** + - **iheartradio:podcast** + - **IlPost** + - **Iltalehti** + - **imdb**: Internet Movie Database trailers + - **imdb:list**: Internet Movie Database lists + - **Imgur** + - **imgur:album** + - **imgur:gallery** + - **Ina** + - **Inc** + - **IndavideoEmbed** + - **InfoQ** + - **Instagram**: [*instagram*](## "netrc machine") + - **instagram:story**: [*instagram*](## "netrc machine") + - **instagram:tag**: [*instagram*](## "netrc machine") Instagram hashtag search URLs + - **instagram:user**: [*instagram*](## "netrc machine") Instagram user profile (**Currently broken**) + - **InstagramIOS**: IOS instagram:// URL + - **Internazionale** + - **InternetVideoArchive** + - **InvestigationDiscovery** + - **IPrima**: [*iprima*](## "netrc machine") + - **IPrimaCNN** + - **iq.com**: International version of iQiyi + - **iq.com:album** + - **iqiyi**: [*iqiyi*](## "netrc machine") 爱奇艺 + - **IslamChannel** + - **IslamChannelSeries** + - **IsraelNationalNews** + - **ITProTV** + - **ITProTVCourse** + - **ITV** + - **ITVBTCC** + - **ivi**: ivi.ru + - **ivi:compilation**: ivi.ru compilations + - **ivideon**: Ivideon TV + - **IVXPlayer** + - **iwara**: [*iwara*](## "netrc machine") + - **iwara:playlist**: [*iwara*](## "netrc machine") + - **iwara:user**: [*iwara*](## "netrc machine") + - **Ixigua** + - **Izlesene** + - **Jable** + - **JablePlaylist** + - **Jamendo** + - **JamendoAlbum** + - **JeuxVideo**: (**Currently broken**) + - **JioSaavnAlbum** + - **JioSaavnSong** + - **Joj** + - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) + - **Jove** + - **JStream** + - **JTBC**: jtbc.co.kr + - **JTBC:program** + - **JWPlatform** + - **Kakao** + - **Kaltura** + - **KankaNews**: (**Currently broken**) + - **Karaoketv** + - **Katsomo**: (**Currently broken**) + - **KelbyOne**: (**Currently broken**) + - **Ketnet** + - **khanacademy** + - **khanacademy:unit** + - **Kick** + - **Kicker** + - **KickStarter** + - **KickVOD** + - **kinja:embed** + - **KinoPoisk** + - **Kommunetv** + - **KompasVideo** + - **Koo**: (**Currently broken**) + - **KrasView**: Красвью (**Currently broken**) + - **KTH** + - **Ku6** + - **KukuluLive** + - **kuwo:album**: 酷我音乐 - 专辑 (**Currently broken**) + - **kuwo:category**: 酷我音乐 - 分类 (**Currently broken**) + - **kuwo:chart**: 酷我音乐 - 排行榜 (**Currently broken**) + - **kuwo:mv**: 酷我音乐 - MV (**Currently broken**) + - **kuwo:singer**: 酷我音乐 - 歌手 (**Currently broken**) + - **kuwo:song**: 酷我音乐 (**Currently broken**) + - **la7.it** + - **la7.it:​pod:episode** + - **la7.it:podcast** + - **LastFM** + - **LastFMPlaylist** + - **LastFMUser** + - **LaXarxaMes**: [*laxarxames*](## "netrc machine") + - **lbry** + - **lbry:channel** + - **lbry:playlist** + - **LCI** + - **Lcp** + - **LcpPlay** + - **Le**: 乐视网 + - **Lecture2Go**: (**Currently broken**) + - **Lecturio**: [*lecturio*](## "netrc machine") + - **LecturioCourse**: [*lecturio*](## "netrc machine") + - **LecturioDeCourse**: [*lecturio*](## "netrc machine") + - **LeFigaroVideoEmbed** + - **LeFigaroVideoSection** + - **LEGO** + - **Lemonde** + - **Lenta**: (**Currently broken**) + - **LePlaylist** + - **LetvCloud**: 乐视云 + - **Libsyn** + - **life**: Life.ru + - **life:embed** + - **likee** + - **likee:user** + - **limelight** + - **limelight:channel** + - **limelight:channel_list** + - **LinkedIn**: [*linkedin*](## "netrc machine") + - **linkedin:learning**: [*linkedin*](## "netrc machine") + - **linkedin:​learning:course**: [*linkedin*](## "netrc machine") + - **Liputan6** + - **ListenNotes** + - **LiTV** + - **LiveJournal** + - **livestream** + - **livestream:original** + - **Livestreamfails** + - **Lnk** + - **LnkGo** + - **loc**: Library of Congress + - **LoveHomePorn** + - **LRTStream** + - **LRTVOD** + - **LSMLREmbed** + - **LSMLTVEmbed** + - **LSMReplay** + - **Lumni** + - **lynda**: [*lynda*](## "netrc machine") lynda.com videos + - **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses + - **maariv.co.il** + - **MagellanTV** + - **MagentaMusik** + - **mailru**: Видео@Mail.Ru + - **mailru:music**: Музыка@Mail.Ru + - **mailru:​music:search**: Музыка@Mail.Ru + - **MainStreaming**: MainStreaming Player + - **mangomolo:live** + - **mangomolo:video** + - **MangoTV**: 芒果TV + - **ManotoTV**: Manoto TV (Episode) + - **ManotoTVLive**: Manoto TV (Live) + - **ManotoTVShow**: Manoto TV (Show) + - **ManyVids**: (**Currently broken**) + - **MaoriTV** + - **Markiza**: (**Currently broken**) + - **MarkizaPage**: (**Currently broken**) + - **massengeschmack.tv** + - **Masters** + - **MatchTV** + - **MBN**: mbn.co.kr (매일방송) + - **MDR**: MDR.DE and KiKA + - **MedalTV** + - **media.ccc.de** + - **media.ccc.de:lists** + - **Mediaite** + - **MediaKlikk** + - **Medialaan** + - **Mediaset** + - **MediasetShow** + - **Mediasite** + - **MediasiteCatalog** + - **MediasiteNamedCatalog** + - **MediaStream** + - **MediaWorksNZVOD** + - **Medici** + - **megaphone.fm**: megaphone.fm embedded players + - **megatvcom**: megatv.com videos + - **megatvcom:embed**: megatv.com embedded videos + - **Meipai**: 美拍 + - **MelonVOD** + - **Metacritic** + - **mewatch** + - **MicrosoftEmbed** + - **microsoftstream**: Microsoft Stream + - **mildom**: Record ongoing live by specific user in Mildom + - **mildom:clip**: Clip in Mildom + - **mildom:​user:vod**: Download all VODs from specific user in Mildom + - **mildom:vod**: VOD in Mildom + - **minds** + - **minds:channel** + - **minds:group** + - **Minoto** + - **mirrativ** + - **mirrativ:user** + - **MirrorCoUK** + - **MiTele**: mitele.es + - **mixch** + - **mixch:archive** + - **mixcloud** + - **mixcloud:playlist** + - **mixcloud:user** + - **MLB** + - **MLBArticle** + - **MLBTV**: [*mlb*](## "netrc machine") + - **MLBVideo** + - **MLSSoccer** + - **MNetTV**: [*mnettv*](## "netrc machine") + - **MNetTVLive**: [*mnettv*](## "netrc machine") + - **MNetTVRecordings**: [*mnettv*](## "netrc machine") + - **MochaVideo** + - **Mojvideo** + - **Monstercat** + - **MonsterSirenHypergryphMusic** + - **Motherless** + - **MotherlessGallery** + - **MotherlessGroup** + - **MotherlessUploader** + - **Motorsport**: motorsport.com (**Currently broken**) + - **MotorTrend** + - **MotorTrendOnDemand** + - **MovieFap** + - **Moviepilot** + - **MoviewPlay** + - **Moviezine** + - **MovingImage** + - **MSN**: (**Currently broken**) + - **mtg**: MTG services + - **mtv** + - **mtv.de**: (**Currently broken**) + - **mtv.it** + - **mtv.it:programma** + - **mtv:video** + - **mtvjapan** + - **mtvservices:embedded** + - **MTVUutisetArticle**: (**Currently broken**) + - **MuenchenTV**: münchen.tv (**Currently broken**) + - **MujRozhlas** + - **Murrtube**: (**Currently broken**) + - **MurrtubeUser**: Murrtube user profile (**Currently broken**) + - **MuseAI** + - **MuseScore** + - **MusicdexAlbum** + - **MusicdexArtist** + - **MusicdexPlaylist** + - **MusicdexSong** + - **mva**: Microsoft Virtual Academy videos + - **mva:course**: Microsoft Virtual Academy courses + - **Mx3** + - **Mx3Neo** + - **Mx3Volksmusik** + - **Mxplayer** + - **MxplayerShow** + - **MySpace** + - **MySpace:album** + - **MySpass** + - **MyVideoGe** + - **MyVidster** + - **Mzaalo** + - **n-tv.de** + - **N1Info:article** + - **N1InfoAsset** + - **Nate** + - **NateProgram** + - **natgeo:video** + - **NationalGeographicTV** + - **Naver** + - **Naver:live** + - **navernow** + - **nba** + - **nba:channel** + - **nba:embed** + - **nba:watch** + - **nba:​watch:collection** + - **nba:​watch:embed** + - **NBC** + - **NBCNews** + - **nbcolympics** + - **nbcolympics:stream** + - **NBCSports** + - **NBCSportsStream** + - **NBCSportsVPlayer** + - **NBCStations** + - **ndr**: NDR.de - Norddeutscher Rundfunk + - **ndr:embed** + - **ndr:​embed:base** + - **NDTV**: (**Currently broken**) + - **nebula:channel**: [*watchnebula*](## "netrc machine") + - **nebula:media**: [*watchnebula*](## "netrc machine") + - **nebula:subscriptions**: [*watchnebula*](## "netrc machine") + - **nebula:video**: [*watchnebula*](## "netrc machine") + - **NekoHacker** + - **NerdCubedFeed** + - **netease:album**: 网易云音乐 - 专辑 + - **netease:djradio**: 网易云音乐 - 电台 + - **netease:mv**: 网易云音乐 - MV + - **netease:playlist**: 网易云音乐 - 歌单 + - **netease:program**: 网易云音乐 - 电台节目 + - **netease:singer**: 网易云音乐 - 歌手 + - **netease:song**: 网易云音乐 + - **NetPlusTV**: [*netplus*](## "netrc machine") + - **NetPlusTVLive**: [*netplus*](## "netrc machine") + - **NetPlusTVRecordings**: [*netplus*](## "netrc machine") + - **Netverse** + - **NetversePlaylist** + - **NetverseSearch**: "netsearch:" prefix + - **Netzkino**: (**Currently broken**) + - **Newgrounds**: [*newgrounds*](## "netrc machine") + - **Newgrounds:playlist** + - **Newgrounds:user** + - **NewsPicks** + - **Newsy** + - **NextMedia**: 蘋果日報 + - **NextMediaActionNews**: 蘋果日報 - 動新聞 + - **NextTV**: 壹電視 (**Currently broken**) + - **Nexx** + - **NexxEmbed** + - **nfb**: nfb.ca and onf.ca films and episodes + - **nfb:series**: nfb.ca and onf.ca series + - **NFHSNetwork** + - **nfl.com** + - **nfl.com:article** + - **nfl.com:​plus:episode** + - **nfl.com:​plus:replay** + - **NhkForSchoolBangumi** + - **NhkForSchoolProgramList** + - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学) + - **NhkRadioNewsPage** + - **NhkRadiru**: NHK らじる (Radiru/Rajiru) + - **NhkRadiruLive** + - **NhkVod** + - **NhkVodProgram** + - **nhl.com** + - **nick.com** + - **nick.de** + - **nickelodeon:br** + - **nickelodeonru** + - **niconico**: [*niconico*](## "netrc machine") ニコニコ動画 + - **niconico:history**: NicoNico user history or likes. Requires cookies. + - **niconico:live**: ニコニコ生放送 + - **niconico:playlist** + - **niconico:series** + - **niconico:tag**: NicoNico video tag URLs + - **NiconicoChannelPlus**: ニコニコチャンネルプラス + - **NiconicoChannelPlus:​channel:lives**: ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives + - **NiconicoChannelPlus:​channel:videos**: ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos + - **NiconicoUser** + - **nicovideo:search**: Nico video search; "nicosearch:" prefix + - **nicovideo:​search:date**: Nico video search, newest first; "nicosearchdate:" prefix + - **nicovideo:search_url**: Nico video search URLs + - **NinaProtocol** + - **Nintendo** + - **Nitter** + - **njoy**: N-JOY + - **njoy:embed** + - **NobelPrize**: (**Currently broken**) + - **NoicePodcast** + - **NonkTube** + - **NoodleMagazine** + - **Noovo** + - **NOSNLArticle** + - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz + - **NovaEmbed** + - **NovaPlay** + - **nowness** + - **nowness:playlist** + - **nowness:series** + - **Noz**: (**Currently broken**) + - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **npo.nl:live** + - **npo.nl:radio** + - **npo.nl:​radio:fragment** + - **Npr** + - **NRK** + - **NRKPlaylist** + - **NRKRadioPodkast** + - **NRKSkole**: NRK Skole + - **NRKTV**: NRK TV and NRK Radio + - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte + - **NRKTVEpisode** + - **NRKTVEpisodes** + - **NRKTVSeason** + - **NRKTVSeries** + - **NRLTV**: (**Currently broken**) + - **ntv.ru** + - **NubilesPorn**: [*nubiles-porn*](## "netrc machine") + - **nuum:live** + - **nuum:media** + - **nuum:tab** + - **Nuvid** + - **NYTimes** + - **NYTimesArticle** + - **NYTimesCookingGuide** + - **NYTimesCookingRecipe** + - **nzherald** + - **NZOnScreen** + - **NZZ** + - **ocw.mit.edu** + - **Odnoklassniki** + - **OfTV** + - **OfTVPlaylist** + - **OktoberfestTV** + - **OlympicsReplay** + - **on24**: ON24 + - **OnDemandChinaEpisode** + - **OnDemandKorea** + - **OnDemandKoreaProgram** + - **OneFootball** + - **OnePlacePodcast** + - **onet.pl** + - **onet.tv** + - **onet.tv:channel** + - **OnetMVP** + - **OnionStudios** + - **Opencast** + - **OpencastPlaylist** + - **openrec** + - **openrec:capture** + - **openrec:movie** + - **OraTV** + - **orf:​fm4:story**: fm4.orf.at stories + - **orf:iptv**: iptv.ORF.at + - **orf:on** + - **orf:podcast** + - **orf:radio** + - **orf:tvthek**: ORF TVthek + - **OsnatelTV**: [*osnateltv*](## "netrc machine") + - **OsnatelTVLive**: [*osnateltv*](## "netrc machine") + - **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine") + - **OutsideTV** + - **OwnCloud** + - **PacktPub**: [*packtpub*](## "netrc machine") + - **PacktPubCourse** + - **PalcoMP3:artist** + - **PalcoMP3:song** + - **PalcoMP3:video** + - **Panopto** + - **PanoptoList** + - **PanoptoPlaylist** + - **ParamountNetwork** + - **ParamountPlus** + - **ParamountPlusSeries** + - **ParamountPressExpress** + - **Parler**: Posts on parler.com + - **parliamentlive.tv**: UK parliament videos + - **Parlview**: (**Currently broken**) + - **Patreon** + - **PatreonCampaign** + - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) + - **PBSKids** + - **PearVideo** + - **PeekVids** + - **peer.tv** + - **PeerTube** + - **PeerTube:Playlist** + - **peloton**: [*peloton*](## "netrc machine") + - **peloton:live**: Peloton Live + - **PerformGroup** + - **periscope**: Periscope + - **periscope:user**: Periscope user videos + - **PGATour** + - **PhilharmonieDeParis**: Philharmonie de Paris + - **phoenix.de** + - **Photobucket** + - **Piapro**: [*piapro*](## "netrc machine") + - **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM + - **Picarto** + - **PicartoVod** + - **Piksel** + - **Pinkbike** + - **Pinterest** + - **PinterestCollection** + - **pixiv:sketch** + - **pixiv:​sketch:user** + - **Pladform** + - **PlanetMarathi** + - **Platzi**: [*platzi*](## "netrc machine") + - **PlatziCourse**: [*platzi*](## "netrc machine") + - **player.sky.it** + - **playeur** + - **PlayPlusTV**: [*playplustv*](## "netrc machine") + - **PlaySuisse**: [*playsuisse*](## "netrc machine") + - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz + - **PlayVids** + - **Playwire** + - **pluralsight**: [*pluralsight*](## "netrc machine") + - **pluralsight:course** + - **PlutoTV**: (**Currently broken**) + - **PodbayFM** + - **PodbayFMChannel** + - **Podchaser** + - **podomatic**: (**Currently broken**) + - **Pokemon** + - **PokemonWatch** + - **PokerGo**: [*pokergo*](## "netrc machine") + - **PokerGoCollection**: [*pokergo*](## "netrc machine") + - **PolsatGo** + - **PolskieRadio** + - **polskieradio:audition** + - **polskieradio:category** + - **polskieradio:legacy** + - **polskieradio:player** + - **polskieradio:podcast** + - **polskieradio:​podcast:list** + - **Popcorntimes** + - **PopcornTV** + - **Pornbox** + - **PornerBros** + - **PornFlip** + - **PornHub**: [*pornhub*](## "netrc machine") PornHub and Thumbzilla + - **PornHubPagedVideoList**: [*pornhub*](## "netrc machine") + - **PornHubPlaylist**: [*pornhub*](## "netrc machine") + - **PornHubUser**: [*pornhub*](## "netrc machine") + - **PornHubUserVideosUpload**: [*pornhub*](## "netrc machine") + - **Pornotube** + - **PornoVoisines**: (**Currently broken**) + - **PornoXO**: (**Currently broken**) + - **PornTop** + - **PornTube** + - **Pr0gramm** + - **PrankCast** + - **PrankCastPost** + - **PremiershipRugby** + - **PressTV** + - **ProjectVeritas**: (**Currently broken**) + - **prosiebensat1**: ProSiebenSat.1 Digital + - **PRXAccount** + - **PRXSeries** + - **prxseries:search**: PRX Series Search; "prxseries:" prefix + - **prxstories:search**: PRX Stories Search; "prxstories:" prefix + - **PRXStory** + - **puhutv** + - **puhutv:serie** + - **Puls4** + - **Pyvideo** + - **QDance**: [*qdance*](## "netrc machine") + - **QingTing** + - **qqmusic**: QQ音乐 + - **qqmusic:album**: QQ音乐 - 专辑 + - **qqmusic:playlist**: QQ音乐 - 歌单 + - **qqmusic:singer**: QQ音乐 - 歌手 + - **qqmusic:toplist**: QQ音乐 - 排行榜 + - **QuantumTV**: [*quantumtv*](## "netrc machine") + - **QuantumTVLive**: [*quantumtv*](## "netrc machine") + - **QuantumTVRecordings**: [*quantumtv*](## "netrc machine") + - **Qub** + - **R7**: (**Currently broken**) + - **R7Article**: (**Currently broken**) + - **Radiko** + - **RadikoRadio** + - **radio.de**: (**Currently broken**) + - **radiocanada** + - **radiocanada:audiovideo** + - **RadioComercial** + - **RadioComercialPlaylist** + - **radiofrance** + - **RadioFranceLive** + - **RadioFrancePodcast** + - **RadioFranceProfile** + - **RadioFranceProgramSchedule** + - **RadioJavan**: (**Currently broken**) + - **radiokapital** + - **radiokapital:show** + - **RadioZetPodcast** + - **radlive** + - **radlive:channel** + - **radlive:season** + - **Rai** + - **RaiCultura** + - **RaiNews** + - **RaiPlay** + - **RaiPlayLive** + - **RaiPlayPlaylist** + - **RaiPlaySound** + - **RaiPlaySoundLive** + - **RaiPlaySoundPlaylist** + - **RaiSudtirol** + - **RayWenderlich** + - **RayWenderlichCourse** + - **RbgTum** + - **RbgTumCourse** + - **RbgTumNewCourse** + - **RCS** + - **RCSEmbeds** + - **RCSVarious** + - **RCTIPlus** + - **RCTIPlusSeries** + - **RCTIPlusTV** + - **RDS**: RDS.ca (**Currently broken**) + - **RedBull** + - **RedBullEmbed** + - **RedBullTV** + - **RedBullTVRrnContent** + - **redcdnlivx** + - **Reddit**: [*reddit*](## "netrc machine") + - **RedGifs** + - **RedGifsSearch**: Redgifs search + - **RedGifsUser**: Redgifs user + - **RedTube** + - **RENTV**: (**Currently broken**) + - **RENTVArticle**: (**Currently broken**) + - **Restudy**: (**Currently broken**) + - **Reuters**: (**Currently broken**) + - **ReverbNation** + - **RheinMainTV** + - **RideHome** + - **RinseFM** + - **RinseFMArtistPlaylist** + - **RMCDecouverte** + - **RockstarGames**: (**Currently broken**) + - **Rokfin**: [*rokfin*](## "netrc machine") + - **rokfin:channel**: Rokfin Channels + - **rokfin:search**: Rokfin Search; "rkfnsearch:" prefix + - **rokfin:stack**: Rokfin Stacks + - **RoosterTeeth**: [*roosterteeth*](## "netrc machine") + - **RoosterTeethSeries**: [*roosterteeth*](## "netrc machine") + - **RottenTomatoes** + - **Rozhlas** + - **RozhlasVltava** + - **RTBF**: [*rtbf*](## "netrc machine") (**Currently broken**) + - **RTDocumentry** + - **RTDocumentryPlaylist** + - **rte**: Raidió Teilifís Éireann TV + - **rte:radio**: Raidió Teilifís Éireann radio + - **rtl.lu:article** + - **rtl.lu:tele-vod** + - **rtl.nl**: rtl.nl and rtlxl.nl + - **rtl2** + - **RTLLuLive** + - **RTLLuRadio** + - **RTNews** + - **RTP** + - **RTRFM** + - **RTS**: RTS.ch (**Currently broken**) + - **RTVCKaltura** + - **RTVCPlay** + - **RTVCPlayEmbed** + - **rtve.es:alacarta**: RTVE a la carta + - **rtve.es:audio**: RTVE audio + - **rtve.es:infantil**: RTVE infantil + - **rtve.es:live**: RTVE.es live streams + - **rtve.es:television** + - **RTVS** + - **rtvslo.si** + - **RudoVideo** + - **Rule34Video** + - **Rumble** + - **RumbleChannel** + - **RumbleEmbed** + - **Ruptly** + - **rutube**: Rutube videos + - **rutube:channel**: Rutube channel + - **rutube:embed**: Rutube embedded videos + - **rutube:movie**: Rutube movies + - **rutube:person**: Rutube person videos + - **rutube:playlist**: Rutube playlists + - **rutube:tags**: Rutube tags + - **RUTV**: RUTV.RU + - **Ruutu** + - **Ruv** + - **ruv.is:spila** + - **S4C** + - **S4CSeries** + - **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video + - **safari:api**: [*safari*](## "netrc machine") + - **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses + - **Saitosan**: (**Currently broken**) + - **SAKTV**: [*saktv*](## "netrc machine") + - **SAKTVLive**: [*saktv*](## "netrc machine") + - **SAKTVRecordings**: [*saktv*](## "netrc machine") + - **SaltTV**: [*salttv*](## "netrc machine") + - **SaltTVLive**: [*salttv*](## "netrc machine") + - **SaltTVRecordings**: [*salttv*](## "netrc machine") + - **SampleFocus** + - **Sangiin**: 参議院インターネット審議中継 (archive) + - **Sapo**: SAPO Vídeos + - **SBS**: sbs.com.au + - **sbs.co.kr** + - **sbs.co.kr:allvod_program** + - **sbs.co.kr:programs_vod** + - **schooltv** + - **ScienceChannel** + - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix + - **Screen9** + - **Screencast** + - **Screencastify** + - **ScreencastOMatic** + - **ScrippsNetworks** + - **scrippsnetworks:watch** + - **Scrolller** + - **SCTE**: [*scte*](## "netrc machine") (**Currently broken**) + - **SCTECourse**: [*scte*](## "netrc machine") (**Currently broken**) + - **sejm** + - **SenalColombiaLive**: (**Currently broken**) + - **SenateGov** + - **SenateISVP** + - **SendtoNews**: (**Currently broken**) + - **Servus** + - **Sexu**: (**Currently broken**) + - **SeznamZpravy** + - **SeznamZpravyArticle** + - **Shahid**: [*shahid*](## "netrc machine") + - **ShahidShow** + - **ShareVideosEmbed** + - **ShemarooMe** + - **ShowRoomLive** + - **ShugiinItvLive**: 衆議院インターネット審議中継 + - **ShugiinItvLiveRoom**: 衆議院インターネット審議中継 (中継) + - **ShugiinItvVod**: 衆議院インターネット審議中継 (ビデオライブラリ) + - **SibnetEmbed** + - **simplecast** + - **simplecast:episode** + - **simplecast:podcast** + - **Sina** + - **Skeb** + - **sky.it** + - **sky:news** + - **sky:​news:story** + - **sky:sports** + - **sky:​sports:news** + - **SkylineWebcams**: (**Currently broken**) + - **skynewsarabia:article**: (**Currently broken**) + - **skynewsarabia:video**: (**Currently broken**) + - **SkyNewsAU** + - **Slideshare** + - **SlidesLive** + - **Slutload** + - **Smotrim** + - **Snotr** + - **Sohu** + - **SohuV** + - **SonyLIV**: [*sonyliv*](## "netrc machine") + - **SonyLIVSeries** + - **soundcloud**: [*soundcloud*](## "netrc machine") + - **soundcloud:playlist**: [*soundcloud*](## "netrc machine") + - **soundcloud:related**: [*soundcloud*](## "netrc machine") + - **soundcloud:search**: [*soundcloud*](## "netrc machine") Soundcloud search; "scsearch:" prefix + - **soundcloud:set**: [*soundcloud*](## "netrc machine") + - **soundcloud:trackstation**: [*soundcloud*](## "netrc machine") + - **soundcloud:user**: [*soundcloud*](## "netrc machine") + - **soundcloud:​user:permalink**: [*soundcloud*](## "netrc machine") + - **SoundcloudEmbed** + - **soundgasm** + - **soundgasm:profile** + - **southpark.cc.com** + - **southpark.cc.com:español** + - **southpark.de** + - **southpark.lat** + - **southpark.nl** + - **southparkstudios.dk** + - **SovietsCloset** + - **SovietsClosetPlaylist** + - **SpankBang** + - **SpankBangPlaylist** + - **Spiegel** + - **Sport5** + - **SportBox** + - **SportDeutschland** + - **spotify**: Spotify episodes (**Currently broken**) + - **spotify:show**: Spotify shows (**Currently broken**) + - **Spreaker** + - **SpreakerPage** + - **SpreakerShow** + - **SpreakerShowPage** + - **SpringboardPlatform** + - **Sprout** + - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**) + - **SRGSSR** + - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites + - **StacommuLive**: [*stacommu*](## "netrc machine") + - **StacommuVOD**: [*stacommu*](## "netrc machine") + - **StagePlusVODConcert**: [*stageplus*](## "netrc machine") + - **stanfordoc**: Stanford Open ClassRoom + - **StarTrek**: (**Currently broken**) + - **startv** + - **Steam** + - **SteamCommunityBroadcast** + - **Stitcher** + - **StitcherShow** + - **StoryFire** + - **StoryFireSeries** + - **StoryFireUser** + - **Streamable** + - **StreamCZ** + - **StreetVoice** + - **StretchInternet** + - **Stripchat** + - **stv:player** + - **Substack** + - **SunPorno** + - **sverigesradio:episode** + - **sverigesradio:publication** + - **SVT** + - **SVTPage** + - **SVTPlay**: SVT Play and Öppet arkiv + - **SVTSeries** + - **SwearnetEpisode** + - **Syfy**: (**Currently broken**) + - **SYVDK** + - **SztvHu** + - **t-online.de**: (**Currently broken**) + - **Tagesschau**: (**Currently broken**) + - **Tass**: (**Currently broken**) + - **TBS** + - **TBSJPEpisode** + - **TBSJPPlaylist** + - **TBSJPProgram** + - **Teachable**: [*teachable*](## "netrc machine") (**Currently broken**) + - **TeachableCourse**: [*teachable*](## "netrc machine") + - **teachertube**: teachertube.com videos (**Currently broken**) + - **teachertube:​user:collection**: teachertube.com user and collection videos (**Currently broken**) + - **TeachingChannel**: (**Currently broken**) + - **Teamcoco** + - **TeamTreeHouse**: [*teamtreehouse*](## "netrc machine") + - **techtv.mit.edu** + - **TedEmbed** + - **TedPlaylist** + - **TedSeries** + - **TedTalk** + - **Tele13** + - **Tele5**: (**Currently broken**) + - **TeleBruxelles** + - **TelecaribePlay** + - **Telecinco**: telecinco.es, cuatro.com and mediaset.es + - **Telegraaf** + - **telegram:embed** + - **TeleMB**: (**Currently broken**) + - **Telemundo**: (**Currently broken**) + - **TeleQuebec** + - **TeleQuebecEmission** + - **TeleQuebecLive** + - **TeleQuebecSquat** + - **TeleQuebecVideo** + - **TeleTask**: (**Currently broken**) + - **Telewebion** + - **Tempo** + - **TennisTV**: [*tennistv*](## "netrc machine") + - **TenPlay**: [*10play*](## "netrc machine") + - **TenPlaySeason** + - **TF1** + - **TFO** + - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") + - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine") + - **TheGuardianPodcast** + - **TheGuardianPodcastPlaylist** + - **TheHoleTv** + - **TheIntercept** + - **ThePlatform** + - **ThePlatformFeed** + - **TheStar** + - **TheSun** + - **TheWeatherChannel** + - **ThisAmericanLife** + - **ThisOldHouse**: [*thisoldhouse*](## "netrc machine") + - **ThisVid** + - **ThisVidMember** + - **ThisVidPlaylist** + - **ThreeSpeak** + - **ThreeSpeakUser** + - **TikTok** + - **tiktok:effect**: (**Currently broken**) + - **tiktok:live** + - **tiktok:sound**: (**Currently broken**) + - **tiktok:tag**: (**Currently broken**) + - **tiktok:user**: (**Currently broken**) + - **TLC** + - **TMZ** + - **TNAFlix** + - **TNAFlixNetworkEmbed** + - **toggle** + - **toggo** + - **tokfm:audition** + - **tokfm:podcast** + - **ToonGoggles** + - **tou.tv**: [*toutv*](## "netrc machine") + - **Toypics**: Toypics video (**Currently broken**) + - **ToypicsUser**: Toypics user profile (**Currently broken**) + - **TrailerAddict**: (**Currently broken**) + - **TravelChannel** + - **Triller**: [*triller*](## "netrc machine") + - **TrillerShort** + - **TrillerUser**: [*triller*](## "netrc machine") + - **Trovo** + - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix + - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix + - **TrovoVod** + - **TrtCocukVideo** + - **TrtWorld** + - **TrueID** + - **TruNews** + - **Truth** + - **TruTV** + - **Tube8**: (**Currently broken**) + - **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at + - **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine") + - **TubiTv**: [*tubitv*](## "netrc machine") + - **TubiTvShow** + - **Tumblr**: [*tumblr*](## "netrc machine") + - **TuneInPodcast** + - **TuneInPodcastEpisode** + - **TuneInStation** + - **tv.dfb.de** + - **TV2** + - **TV2Article** + - **TV2DK** + - **TV2DKBornholmPlay** + - **tv2play.hu** + - **tv2playseries.hu** + - **TV4**: tv4.se and tv4play.se + - **TV5MondePlus**: TV5MONDE+ + - **tv5unis** + - **tv5unis:video** + - **tv8.it** + - **TVA** + - **TVANouvelles** + - **TVANouvellesArticle** + - **TVC** + - **TVCArticle** + - **TVer** + - **tvigle**: Интернет-телевидение Tvigle.ru + - **TVIPlayer** + - **tvland.com** + - **TVN24**: (**Currently broken**) + - **TVNoe**: (**Currently broken**) + - **tvopengr:embed**: tvopen.gr embedded videos + - **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos + - **tvp**: Telewizja Polska + - **tvp:embed**: Telewizja Polska + - **tvp:stream** + - **tvp:vod** + - **tvp:​vod:series** + - **TVPlayer** + - **TVPlayHome** + - **Tweakers** + - **TwitCasting** + - **TwitCastingLive** + - **TwitCastingUser** + - **twitch:clips**: [*twitch*](## "netrc machine") + - **twitch:stream**: [*twitch*](## "netrc machine") + - **twitch:vod**: [*twitch*](## "netrc machine") + - **TwitchCollection**: [*twitch*](## "netrc machine") + - **TwitchVideos**: [*twitch*](## "netrc machine") + - **TwitchVideosClips**: [*twitch*](## "netrc machine") + - **TwitchVideosCollections**: [*twitch*](## "netrc machine") + - **twitter**: [*twitter*](## "netrc machine") + - **twitter:amplify**: [*twitter*](## "netrc machine") + - **twitter:broadcast**: [*twitter*](## "netrc machine") + - **twitter:card** + - **twitter:shortener**: [*twitter*](## "netrc machine") + - **twitter:spaces**: [*twitter*](## "netrc machine") + - **Txxx** + - **udemy**: [*udemy*](## "netrc machine") + - **udemy:course**: [*udemy*](## "netrc machine") + - **UDNEmbed**: 聯合影音 + - **UFCArabia**: [*ufcarabia*](## "netrc machine") + - **UFCTV**: [*ufctv*](## "netrc machine") + - **ukcolumn**: (**Currently broken**) + - **UKTVPlay** + - **umg:de**: Universal Music Deutschland (**Currently broken**) + - **Unistra** + - **Unity**: (**Currently broken**) + - **uol.com.br** + - **uplynk** + - **uplynk:preplay** + - **Urort**: NRK P3 Urørt (**Currently broken**) + - **URPlay** + - **USANetwork** + - **USAToday** + - **ustream** + - **ustream:channel** + - **ustudio** + - **ustudio:embed** + - **Varzesh3**: (**Currently broken**) + - **Vbox7** + - **Veo** + - **Veoh** + - **veoh:user** + - **Vesti**: Вести.Ru (**Currently broken**) + - **Vevo** + - **VevoPlaylist** + - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet + - **vh1.com** + - **vhx:embed**: [*vimeo*](## "netrc machine") + - **vice** + - **vice:article** + - **vice:show** + - **Viddler** + - **Videa** + - **video.arnes.si**: Arnes Video + - **video.google:search**: Google Video search; "gvsearch:" prefix + - **video.sky.it** + - **video.sky.it:live** + - **VideoDetective** + - **videofy.me**: (**Currently broken**) + - **VideoKen** + - **VideoKenCategory** + - **VideoKenPlayer** + - **VideoKenPlaylist** + - **VideoKenTopic** + - **videomore** + - **videomore:season** + - **videomore:video** + - **VideoPress** + - **Vidio**: [*vidio*](## "netrc machine") + - **VidioLive**: [*vidio*](## "netrc machine") + - **VidioPremier**: [*vidio*](## "netrc machine") + - **VidLii** + - **Vidly** + - **viewlift** + - **viewlift:embed** + - **Viidea** + - **viki**: [*viki*](## "netrc machine") + - **viki:channel**: [*viki*](## "netrc machine") + - **vimeo**: [*vimeo*](## "netrc machine") + - **vimeo:album**: [*vimeo*](## "netrc machine") + - **vimeo:channel**: [*vimeo*](## "netrc machine") + - **vimeo:group**: [*vimeo*](## "netrc machine") + - **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes + - **vimeo:ondemand**: [*vimeo*](## "netrc machine") + - **vimeo:pro**: [*vimeo*](## "netrc machine") + - **vimeo:review**: [*vimeo*](## "netrc machine") Review pages on vimeo + - **vimeo:user**: [*vimeo*](## "netrc machine") + - **vimeo:watchlater**: [*vimeo*](## "netrc machine") Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) + - **Vimm:recording** + - **Vimm:stream** + - **ViMP** + - **ViMP:Playlist** + - **Vine** + - **vine:user** + - **Viously** + - **Viqeo**: (**Currently broken**) + - **Viu** + - **viu:ott**: [*viu*](## "netrc machine") + - **viu:playlist** + - **ViuOTTIndonesia** + - **vk**: [*vk*](## "netrc machine") VK + - **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos + - **vk:wallpost**: [*vk*](## "netrc machine") + - **VKPlay** + - **VKPlayLive** + - **vm.tiktok** + - **Vocaroo** + - **VODPl** + - **VODPlatform** + - **voicy**: (**Currently broken**) + - **voicy:channel**: (**Currently broken**) + - **VolejTV** + - **Voot**: [*voot*](## "netrc machine") (**Currently broken**) + - **VootSeries**: [*voot*](## "netrc machine") (**Currently broken**) + - **VoxMedia** + - **VoxMediaVolume** + - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **vqq:series** + - **vqq:video** + - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza + - **VrtNU**: [*vrtnu*](## "netrc machine") VRT MAX + - **VTM**: (**Currently broken**) + - **VTXTV**: [*vtxtv*](## "netrc machine") + - **VTXTVLive**: [*vtxtv*](## "netrc machine") + - **VTXTVRecordings**: [*vtxtv*](## "netrc machine") + - **VuClip** + - **VVVVID** + - **VVVVIDShow** + - **Walla** + - **WalyTV**: [*walytv*](## "netrc machine") + - **WalyTVLive**: [*walytv*](## "netrc machine") + - **WalyTVRecordings**: [*walytv*](## "netrc machine") + - **washingtonpost** + - **washingtonpost:article** + - **wat.tv** + - **WatchESPN** + - **WDR** + - **wdr:mobile**: (**Currently broken**) + - **WDRElefant** + - **WDRPage** + - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix + - **Webcamerapl** + - **Webcaster** + - **WebcasterFeed** + - **WebOfStories** + - **WebOfStoriesPlaylist** + - **Weibo** + - **WeiboUser** + - **WeiboVideo** + - **WeiqiTV**: WQTV (**Currently broken**) + - **wetv:episode** + - **WeTvSeries** + - **Weverse**: [*weverse*](## "netrc machine") + - **WeverseLive**: [*weverse*](## "netrc machine") + - **WeverseLiveTab**: [*weverse*](## "netrc machine") + - **WeverseMedia**: [*weverse*](## "netrc machine") + - **WeverseMediaTab**: [*weverse*](## "netrc machine") + - **WeverseMoment**: [*weverse*](## "netrc machine") + - **WeVidi** + - **Weyyak** + - **whowatch** + - **Whyp** + - **wikimedia.org** + - **Wimbledon** + - **WimTV** + - **WinSportsVideo** + - **Wistia** + - **WistiaChannel** + - **WistiaPlaylist** + - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **wordpress:mb.miniAudioPlayer** + - **wordpress:playlist** + - **WorldStarHipHop** + - **wppilot** + - **wppilot:channels** + - **WrestleUniversePPV**: [*wrestleuniverse*](## "netrc machine") + - **WrestleUniverseVOD**: [*wrestleuniverse*](## "netrc machine") + - **WSJ**: Wall Street Journal + - **WSJArticle** + - **WWE** + - **wyborcza:video** + - **WyborczaPodcast** + - **wykop:dig** + - **wykop:​dig:comment** + - **wykop:post** + - **wykop:​post:comment** + - **Xanimu** + - **XboxClips** + - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing + - **XHamster** + - **XHamsterEmbed** + - **XHamsterUser** + - **ximalaya**: 喜马拉雅FM + - **ximalaya:album**: 喜马拉雅FM 专辑 + - **xinpianchang**: xinpianchang.com (**Currently broken**) + - **XMinus**: (**Currently broken**) + - **XNXX** + - **Xstream** + - **XVideos** + - **xvideos:quickies** + - **XXXYMovies** + - **Yahoo**: Yahoo screen and movies + - **yahoo:japannews**: Yahoo! Japan News + - **YandexDisk** + - **yandexmusic:album**: Яндекс.Музыка - Альбом + - **yandexmusic:​artist:albums**: Яндекс.Музыка - Артист - Альбомы + - **yandexmusic:​artist:tracks**: Яндекс.Музыка - Артист - Треки + - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист + - **yandexmusic:track**: Яндекс.Музыка - Трек + - **YandexVideo** + - **YandexVideoPreview** + - **YapFiles**: (**Currently broken**) + - **Yappy**: (**Currently broken**) + - **YappyProfile** + - **YleAreena** + - **YouJizz** + - **youku**: 优酷 + - **youku:show** + - **YouNowChannel** + - **YouNowLive** + - **YouNowMoment** + - **YouPorn** + - **YourPorn** + - **YourUpload** + - **youtube**: YouTube + - **youtube:clip** + - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) + - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) + - **youtube:​music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs + - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies) + - **youtube:playlist**: YouTube playlists + - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword + - **youtube:search**: YouTube search; "ytsearch:" prefix + - **youtube:​search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix + - **youtube:search_url**: YouTube search URLs with sorting and filter support + - **youtube:​shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) + - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) + - **youtube:tab**: YouTube Tabs + - **youtube:user**: YouTube user videos; "ytuser:" prefix + - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies) + - **YoutubeLivestreamEmbed**: YouTube livestream embeds + - **YoutubeYtBe**: youtu.be + - **Zaiko** + - **ZaikoETicket** + - **Zapiks** + - **Zattoo**: [*zattoo*](## "netrc machine") + - **ZattooLive**: [*zattoo*](## "netrc machine") + - **ZattooMovies**: [*zattoo*](## "netrc machine") + - **ZattooRecordings**: [*zattoo*](## "netrc machine") + - **ZDF** + - **ZDFChannel** + - **Zee5**: [*zee5*](## "netrc machine") + - **zee5:series** + - **ZeeNews**: (**Currently broken**) + - **ZenPorn** + - **ZenYandex** + - **ZenYandexChannel** + - **ZetlandDKArticle** + - **Zhihu** + - **zingmp3**: zingmp3.vn + - **zingmp3:album** + - **zingmp3:chart-home** + - **zingmp3:chart-music-video** + - **zingmp3:hub** + - **zingmp3:liveradio** + - **zingmp3:podcast** + - **zingmp3:podcast-episode** + - **zingmp3:user** + - **zingmp3:week-chart** + - **zoom** + - **Zype** + - **generic**: Generic downloader that works on some sites diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..2fbc269 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,26 @@ +import functools +import inspect + +import pytest + +from yt_dlp.networking import RequestHandler +from yt_dlp.networking.common import _REQUEST_HANDLERS +from yt_dlp.utils._utils import _YDLLogger as FakeLogger + + +@pytest.fixture +def handler(request): + RH_KEY = request.param + if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler): + handler = RH_KEY + elif RH_KEY in _REQUEST_HANDLERS: + handler = _REQUEST_HANDLERS[RH_KEY] + else: + pytest.skip(f'{RH_KEY} request handler is not available') + + return functools.partial(handler, logger=FakeLogger) + + +def validate_and_send(rh, req): + rh.validate(req) + return rh.send(req) diff --git a/test/helper.py b/test/helper.py new file mode 100644 index 0000000..7760fd8 --- /dev/null +++ b/test/helper.py @@ -0,0 +1,340 @@ +import errno +import hashlib +import json +import os.path +import re +import ssl +import sys +import types + +import yt_dlp.extractor +from yt_dlp import YoutubeDL +from yt_dlp.compat import compat_os_name +from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port + +if 'pytest' in sys.modules: + import pytest + is_download_test = pytest.mark.download +else: + def is_download_test(testClass): + return testClass + + +def get_params(override=None): + PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + 'parameters.json') + LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + 'local_parameters.json') + with open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + if os.path.exists(LOCAL_PARAMETERS_FILE): + with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + parameters.update(json.load(pf)) + if override: + parameters.update(override) + return parameters + + +def try_rm(filename): + """ Remove a file if it exists """ + try: + os.remove(filename) + except OSError as ose: + if ose.errno != errno.ENOENT: + raise + + +def report_warning(message, *args, **kwargs): + ''' + Print the message to stderr, it will be prefixed with 'WARNING:' + If stderr is a tty file the 'WARNING:' will be colored + ''' + if sys.stderr.isatty() and compat_os_name != 'nt': + _msg_header = '\033[0;33mWARNING:\033[0m' + else: + _msg_header = 'WARNING:' + output = f'{_msg_header} {message}\n' + if 'b' in getattr(sys.stderr, 'mode', ''): + output = output.encode(preferredencoding()) + sys.stderr.write(output) + + +class FakeYDL(YoutubeDL): + def __init__(self, override=None): + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + params = get_params(override=override) + super().__init__(params, auto_init=False) + self.result = [] + + def to_screen(self, s, *args, **kwargs): + print(s) + + def trouble(self, s, *args, **kwargs): + raise Exception(s) + + def download(self, x): + self.result.append(x) + + def expect_warning(self, regex): + # Silence an expected warning matching a regex + old_report_warning = self.report_warning + + def report_warning(self, message, *args, **kwargs): + if re.match(regex, message): + return + old_report_warning(message, *args, **kwargs) + self.report_warning = types.MethodType(report_warning, self) + + +def gettestcases(include_onlymatching=False): + for ie in yt_dlp.extractor.gen_extractors(): + yield from ie.get_testcases(include_onlymatching) + + +def getwebpagetestcases(): + for ie in yt_dlp.extractor.gen_extractors(): + for tc in ie.get_webpage_testcases(): + tc.setdefault('add_ie', []).append('Generic') + yield tc + + +md5 = lambda s: hashlib.md5(s.encode()).hexdigest() + + +def expect_value(self, got, expected, field): + if isinstance(expected, str) and expected.startswith('re:'): + match_str = expected[len('re:'):] + match_rex = re.compile(match_str) + + self.assertTrue( + isinstance(got, str), + f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}') + self.assertTrue( + match_rex.match(got), + f'field {field} (value: {got!r}) should match {match_str!r}') + elif isinstance(expected, str) and expected.startswith('startswith:'): + start_str = expected[len('startswith:'):] + self.assertTrue( + isinstance(got, str), + f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}') + self.assertTrue( + got.startswith(start_str), + f'field {field} (value: {got!r}) should start with {start_str!r}') + elif isinstance(expected, str) and expected.startswith('contains:'): + contains_str = expected[len('contains:'):] + self.assertTrue( + isinstance(got, str), + f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}') + self.assertTrue( + contains_str in got, + f'field {field} (value: {got!r}) should contain {contains_str!r}') + elif isinstance(expected, type): + self.assertTrue( + isinstance(got, expected), + f'Expected type {expected!r} for field {field}, but got value {got!r} of type {type(got)!r}') + elif isinstance(expected, dict) and isinstance(got, dict): + expect_dict(self, got, expected) + elif isinstance(expected, list) and isinstance(got, list): + self.assertEqual( + len(expected), len(got), + 'Expect a list of length %d, but got a list of length %d for field %s' % ( + len(expected), len(got), field)) + for index, (item_got, item_expected) in enumerate(zip(got, expected)): + type_got = type(item_got) + type_expected = type(item_expected) + self.assertEqual( + type_expected, type_got, + 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % ( + index, field, type_expected, type_got)) + expect_value(self, item_got, item_expected, field) + else: + if isinstance(expected, str) and expected.startswith('md5:'): + self.assertTrue( + isinstance(got, str), + f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}') + got = 'md5:' + md5(got) + elif isinstance(expected, str) and re.match(r'^(?:min|max)?count:\d+', expected): + self.assertTrue( + isinstance(got, (list, dict)), + f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}') + op, _, expected_num = expected.partition(':') + expected_num = int(expected_num) + if op == 'mincount': + assert_func = assertGreaterEqual + msg_tmpl = 'Expected %d items in field %s, but only got %d' + elif op == 'maxcount': + assert_func = assertLessEqual + msg_tmpl = 'Expected maximum %d items in field %s, but got %d' + elif op == 'count': + assert_func = assertEqual + msg_tmpl = 'Expected exactly %d items in field %s, but got %d' + else: + assert False + assert_func( + self, len(got), expected_num, + msg_tmpl % (expected_num, field, len(got))) + return + self.assertEqual( + expected, got, + f'Invalid value for field {field}, expected {expected!r}, got {got!r}') + + +def expect_dict(self, got_dict, expected_dict): + for info_field, expected in expected_dict.items(): + got = got_dict.get(info_field) + expect_value(self, got, expected, info_field) + + +def sanitize_got_info_dict(got_dict): + IGNORED_FIELDS = ( + *YoutubeDL._format_fields, + + # Lists + 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries', + + # Auto-generated + 'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch', 'n_entries', + 'fulltitle', 'extractor', 'extractor_key', 'filename', 'filepath', 'infojson_filename', 'original_url', + + # Only live_status needs to be checked + 'is_live', 'was_live', + ) + + IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage') + + def sanitize(key, value): + if isinstance(value, str) and len(value) > 100 and key != 'thumbnail': + return f'md5:{md5(value)}' + elif isinstance(value, list) and len(value) > 10: + return f'count:{len(value)}' + elif key.endswith('_count') and isinstance(value, int): + return int + return value + + test_info_dict = { + key: sanitize(key, value) for key, value in got_dict.items() + if value is not None and key not in IGNORED_FIELDS and ( + not any(key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES) + or key == '_old_archive_ids') + } + + # display_id may be generated from id + if test_info_dict.get('display_id') == test_info_dict.get('id'): + test_info_dict.pop('display_id') + + # Remove deprecated fields + for old in YoutubeDL._deprecated_multivalue_fields.keys(): + test_info_dict.pop(old, None) + + # release_year may be generated from release_date + if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): + test_info_dict.pop('release_year') + + # Check url for flat entries + if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'): + test_info_dict['url'] = got_dict['url'] + + return test_info_dict + + +def expect_info_dict(self, got_dict, expected_dict): + expect_dict(self, got_dict, expected_dict) + # Check for the presence of mandatory fields + if got_dict.get('_type') not in ('playlist', 'multi_video'): + mandatory_fields = ['id', 'title'] + if expected_dict.get('ext'): + mandatory_fields.extend(('url', 'ext')) + for key in mandatory_fields: + self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + # Check for mandatory fields that are automatically set by YoutubeDL + if got_dict.get('_type', 'video') == 'video': + for key in ['webpage_url', 'extractor', 'extractor_key']: + self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) + + test_info_dict = sanitize_got_info_dict(got_dict) + + missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) + if missing_keys: + def _repr(v): + if isinstance(v, str): + return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') + elif isinstance(v, type): + return v.__name__ + else: + return repr(v) + info_dict_str = ''.join( + f' {_repr(k)}: {_repr(v)},\n' + for k, v in test_info_dict.items() if k not in missing_keys) + if info_dict_str: + info_dict_str += '\n' + info_dict_str += ''.join( + f' {_repr(k)}: {_repr(test_info_dict[k])},\n' + for k in missing_keys) + info_dict_str = '\n\'info_dict\': {\n' + info_dict_str + '},\n' + write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr) + self.assertFalse( + missing_keys, + 'Missing keys in test definition: %s' % ( + ', '.join(sorted(missing_keys)))) + + +def assertRegexpMatches(self, text, regexp, msg=None): + if hasattr(self, 'assertRegexp'): + return self.assertRegexp(text, regexp, msg) + else: + m = re.match(regexp, text) + if not m: + note = 'Regexp didn\'t match: %r not found' % (regexp) + if len(text) < 1000: + note += ' in %r' % text + if msg is None: + msg = note + else: + msg = note + ', ' + msg + self.assertTrue(m, msg) + + +def assertGreaterEqual(self, got, expected, msg=None): + if not (got >= expected): + if msg is None: + msg = f'{got!r} not greater than or equal to {expected!r}' + self.assertTrue(got >= expected, msg) + + +def assertLessEqual(self, got, expected, msg=None): + if not (got <= expected): + if msg is None: + msg = f'{got!r} not less than or equal to {expected!r}' + self.assertTrue(got <= expected, msg) + + +def assertEqual(self, got, expected, msg=None): + if not (got == expected): + if msg is None: + msg = f'{got!r} not equal to {expected!r}' + self.assertTrue(got == expected, msg) + + +def expect_warnings(ydl, warnings_re): + real_warning = ydl.report_warning + + def _report_warning(w, *args, **kwargs): + if not any(re.search(w_re, w) for w_re in warnings_re): + real_warning(w, *args, **kwargs) + + ydl.report_warning = _report_warning + + +def http_server_port(httpd): + if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): + # In Jython SSLSocket is not a subclass of socket.socket + sock = httpd.socket.sock + else: + sock = httpd.socket + return sock.getsockname()[1] + + +def verify_address_availability(address): + if find_available_port(address) is None: + pytest.skip(f'Unable to bind to source address {address} (address may not exist)') diff --git a/test/parameters.json b/test/parameters.json new file mode 100644 index 0000000..8789ce1 --- /dev/null +++ b/test/parameters.json @@ -0,0 +1,49 @@ +{ + "check_formats": false, + "consoletitle": false, + "continuedl": true, + "forcedescription": false, + "forcefilename": false, + "forceformat": false, + "forcethumbnail": false, + "forcetitle": false, + "forceurl": false, + "force_write_download_archive": false, + "format": "b/bv", + "ignoreerrors": false, + "listformats": null, + "logtostderr": false, + "matchtitle": null, + "max_downloads": null, + "overwrites": null, + "nopart": false, + "noprogress": false, + "outtmpl": "%(id)s.%(ext)s", + "password": null, + "playliststart": 1, + "prefer_free_formats": false, + "quiet": false, + "ratelimit": null, + "rejecttitle": null, + "retries": 10, + "simulate": false, + "subtitleslang": null, + "subtitlesformat": "best", + "test": true, + "updatetime": true, + "usenetrc": false, + "username": null, + "verbose": true, + "writedescription": false, + "writeinfojson": true, + "writeannotations": false, + "writelink": false, + "writeurllink": false, + "writewebloclink": false, + "writedesktoplink": false, + "writesubtitles": false, + "allsubtitles": false, + "listsubtitles": false, + "fixup": "never", + "allow_playlist_files": false +} diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py new file mode 100644 index 0000000..b7dee49 --- /dev/null +++ b/test/test_InfoExtractor.py @@ -0,0 +1,1911 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import http.server +import threading + +from test.helper import FakeYDL, expect_dict, expect_value, http_server_port +from yt_dlp.compat import compat_etree_fromstring +from yt_dlp.extractor import YoutubeIE, get_info_extractor +from yt_dlp.extractor.common import InfoExtractor +from yt_dlp.utils import ( + ExtractorError, + RegexNotFoundError, + encode_data_uri, + strip_jsonp, +) + +TEAPOT_RESPONSE_STATUS = 418 +TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>" + + +class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/teapot': + self.send_response(TEAPOT_RESPONSE_STATUS) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + else: + assert False + + +class DummyIE(InfoExtractor): + def _sort_formats(self, formats, field_preference=[]): + self._downloader.sort_formats( + {'formats': formats, '_format_sort_fields': field_preference}) + + +class TestInfoExtractor(unittest.TestCase): + def setUp(self): + self.ie = DummyIE(FakeYDL()) + + def test_ie_key(self): + self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) + + def test_html_search_regex(self): + html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' + search = lambda re, *args: self.ie._html_search_regex(re, html, *args) + self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video') + + def test_opengraph(self): + ie = self.ie + html = ''' + <meta name="og:title" content='Foo'/> + <meta content="Some video's description " name="og:description"/> + <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/> + <meta content='application/x-shockwave-flash' property='og:video:type'> + <meta content='Foo' property=og:foobar> + <meta name="og:test1" content='foo > < bar'/> + <meta name="og:test2" content="foo >//< bar"/> + <meta property=og-test3 content='Ill-formatted opengraph'/> + <meta property=og:test4 content=unquoted-value/> + ''' + self.assertEqual(ie._og_search_title(html), 'Foo') + self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') + self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + self.assertEqual(ie._og_search_video_url(html, default=None), None) + self.assertEqual(ie._og_search_property('foobar', html), 'Foo') + self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') + self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') + self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') + self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') + self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) + self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) + self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value') + + def test_html_search_meta(self): + ie = self.ie + html = ''' + <meta name="a" content="1" /> + <meta name='b' content='2'> + <meta name="c" content='3'> + <meta name=d content='4'> + <meta property="e" content='5' > + <meta content="6" name="f"> + ''' + + self.assertEqual(ie._html_search_meta('a', html), '1') + self.assertEqual(ie._html_search_meta('b', html), '2') + self.assertEqual(ie._html_search_meta('c', html), '3') + self.assertEqual(ie._html_search_meta('d', html), '4') + self.assertEqual(ie._html_search_meta('e', html), '5') + self.assertEqual(ie._html_search_meta('f', html), '6') + self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1') + self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3') + self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3') + self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) + self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) + + def test_search_json_ld_realworld(self): + _TESTS = [ + # https://github.com/ytdl-org/youtube-dl/issues/23306 + ( + r'''<script type="application/ld+json"> +{ +"@context": "http://schema.org/", +"@type": "VideoObject", +"name": "1 On 1 With Kleio", +"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/", +"duration": "PT0H12M23S", +"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"], +"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4", +"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/", +"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", +"width": "1920", +"height": "1080", +"encodingFormat": "mp4", +"bitrate": "6617kbps", +"isFamilyFriendly": "False", +"description": "Kleio Valentien", +"uploadDate": "2015-12-05T21:24:35+01:00", +"interactionStatistic": { +"@type": "InteractionCounter", +"interactionType": { "@type": "http://schema.org/WatchAction" }, +"userInteractionCount": 1120958 +}, "aggregateRating": { +"@type": "AggregateRating", +"ratingValue": "88", +"ratingCount": "630", +"bestRating": "100", +"worstRating": "0" +}, "actor": [{ +"@type": "Person", +"name": "Kleio Valentien", +"url": "https://www.eporner.com/pornstar/kleio-valentien/" +}]} + </script>''', + { + 'title': '1 On 1 With Kleio', + 'description': 'Kleio Valentien', + 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', + 'timestamp': 1449347075, + 'duration': 743.0, + 'view_count': 1120958, + 'width': 1920, + 'height': 1080, + }, + {}, + ), + ( + r'''<script type="application/ld+json"> + { + "@context": "https://schema.org", + "@graph": [ + { + "@type": "NewsArticle", + "mainEntityOfPage": { + "@type": "WebPage", + "@id": "https://www.ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn" + }, + "headline": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν", + "name": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν", + "description": "Τα παιδιά δέχθηκαν την επίθεση επειδή αρνήθηκαν να γίνουν μέλη της συμμορίας, ανέφερε ο Γ. Ζαχαρόπουλος.", + "image": { + "@type": "ImageObject", + "url": "https://ant1media.azureedge.net/imgHandler/1100/a635c968-be71-447c-bf9c-80d843ece21e.jpg", + "width": 1100, + "height": 756 }, + "datePublished": "2021-11-10T08:50:00+03:00", + "dateModified": "2021-11-10T08:52:53+03:00", + "author": { + "@type": "Person", + "@id": "https://www.ant1news.gr/", + "name": "Ant1news", + "image": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png", + "url": "https://www.ant1news.gr/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.ant1news.gr#publisher", + "name": "Ant1news", + "url": "https://www.ant1news.gr", + "logo": { + "@type": "ImageObject", + "url": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png", + "width": 400, + "height": 400 }, + "sameAs": [ + "https://www.facebook.com/Ant1news.gr", + "https://twitter.com/antennanews", + "https://www.youtube.com/channel/UC0smvAbfczoN75dP0Hw4Pzw", + "https://www.instagram.com/ant1news/" + ] + }, + + "keywords": "μαχαίρωμα,συμμορία ανηλίκων,ΕΙΔΗΣΕΙΣ,ΕΙΔΗΣΕΙΣ ΣΗΜΕΡΑ,ΝΕΑ,Κοινωνία - Ant1news", + + + "articleSection": "Κοινωνία" + } + ] + } + </script>''', + { + 'timestamp': 1636523400, + 'title': 'md5:91fe569e952e4d146485740ae927662b', + }, + {'expected_type': 'NewsArticle'}, + ), + ( + r'''<script type="application/ld+json"> + {"url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/", + "name":"Het journaal 19u", + "description":"Het journaal 19u van vrijdag 31 december 2021.", + "potentialAction":{"url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8","@type":"ShareAction"}, + "mainEntityOfPage":{"@id":"1640092242445","@type":"WebPage"}, + "publication":[{ + "startDate":"2021-12-31T19:00:00.000+01:00", + "endDate":"2022-01-30T23:55:00.000+01:00", + "publishedBy":{"name":"een","@type":"Organization"}, + "publishedOn":{"url":"https://www.vrt.be/vrtnu/","name":"VRT NU","@type":"BroadcastService"}, + "@id":"pbs-pub-3a7ec233-da95-4c1e-9b2b-cf5fdfebcbe8", + "@type":"BroadcastEvent" + }], + "video":{ + "name":"Het journaal - Aflevering 365 (Seizoen 2021)", + "description":"Het journaal 19u van vrijdag 31 december 2021. Bekijk aflevering 365 van seizoen 2021 met VRT NU via de site of app.", + "thumbnailUrl":"//images.vrt.be/width1280/2021/12/31/80d5ed00-6a64-11ec-b07d-02b7b76bf47f.jpg", + "expires":"2022-01-30T23:55:00.000+01:00", + "hasPart":[ + {"name":"Explosie Turnhout","startOffset":70,"@type":"Clip"}, + {"name":"Jaarwisseling","startOffset":440,"@type":"Clip"}, + {"name":"Natuurbranden Colorado","startOffset":1179,"@type":"Clip"}, + {"name":"Klimaatverandering","startOffset":1263,"@type":"Clip"}, + {"name":"Zacht weer","startOffset":1367,"@type":"Clip"}, + {"name":"Financiële balans","startOffset":1383,"@type":"Clip"}, + {"name":"Club Brugge","startOffset":1484,"@type":"Clip"}, + {"name":"Mentale gezondheid bij topsporters","startOffset":1575,"@type":"Clip"}, + {"name":"Olympische Winterspelen","startOffset":1728,"@type":"Clip"}, + {"name":"Sober oudjaar in Nederland","startOffset":1873,"@type":"Clip"} + ], + "duration":"PT34M39.23S", + "uploadDate":"2021-12-31T19:00:00.000+01:00", + "@id":"vid-9457d0c6-b8ac-4aba-b5e1-15aa3a3295b5", + "@type":"VideoObject" + }, + "genre":["Nieuws en actua"], + "episodeNumber":365, + "partOfSeries":{"name":"Het journaal","@id":"222831405527","@type":"TVSeries"}, + "partOfSeason":{"name":"Seizoen 2021","@id":"961809365527","@type":"TVSeason"}, + "@context":"https://schema.org","@id":"961685295527","@type":"TVEpisode"}</script> + ''', + { + 'chapters': [ + {"title": "Explosie Turnhout", "start_time": 70, "end_time": 440}, + {"title": "Jaarwisseling", "start_time": 440, "end_time": 1179}, + {"title": "Natuurbranden Colorado", "start_time": 1179, "end_time": 1263}, + {"title": "Klimaatverandering", "start_time": 1263, "end_time": 1367}, + {"title": "Zacht weer", "start_time": 1367, "end_time": 1383}, + {"title": "Financiële balans", "start_time": 1383, "end_time": 1484}, + {"title": "Club Brugge", "start_time": 1484, "end_time": 1575}, + {"title": "Mentale gezondheid bij topsporters", "start_time": 1575, "end_time": 1728}, + {"title": "Olympische Winterspelen", "start_time": 1728, "end_time": 1873}, + {"title": "Sober oudjaar in Nederland", "start_time": 1873, "end_time": 2079.23} + ], + 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)' + }, {} + ), + ( + # test multiple thumbnails in a list + r''' +<script type="application/ld+json"> +{"@context":"https://schema.org", +"@type":"VideoObject", +"thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]} +</script>''', + { + 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], + }, + {}, + ), + ( + # test single thumbnail + r''' +<script type="application/ld+json"> +{"@context":"https://schema.org", +"@type":"VideoObject", +"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"} +</script>''', + { + 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], + }, + {}, + ) + ] + for html, expected_dict, search_json_ld_kwargs in _TESTS: + expect_dict( + self, + self.ie._search_json_ld(html, None, **search_json_ld_kwargs), + expected_dict + ) + + def test_download_json(self): + uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') + self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) + uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript') + self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'}) + uri = encode_data_uri(b'{"foo": invalid}', 'application/json') + self.assertRaises(ExtractorError, self.ie._download_json, uri, None) + self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + + def test_parse_html5_media_entries(self): + # inline video tag + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://127.0.0.1/video.html', + r'<html><video src="/vid.mp4" /></html>', None)[0], + { + 'formats': [{ + 'url': 'https://127.0.0.1/vid.mp4', + }], + }) + + # from https://www.r18.com/ + # with kpbs in label + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.r18.com/', + r''' + <video id="samplevideo_amateur" class="js-samplevideo video-js vjs-default-skin vjs-big-play-centered" controls preload="auto" width="400" height="225" poster="//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg"> + <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4" type="video/mp4" res="240" label="300kbps"> + <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4" type="video/mp4" res="480" label="1000kbps"> + <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4" type="video/mp4" res="740" label="1500kbps"> + <p>Your browser does not support the video tag.</p> + </video> + ''', None)[0], + { + 'formats': [{ + 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4', + 'ext': 'mp4', + 'format_id': '300kbps', + 'height': 240, + 'tbr': 300, + }, { + 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4', + 'ext': 'mp4', + 'format_id': '1000kbps', + 'height': 480, + 'tbr': 1000, + }, { + 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4', + 'ext': 'mp4', + 'format_id': '1500kbps', + 'height': 740, + 'tbr': 1500, + }], + 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg' + }) + + # from https://www.csfd.cz/ + # with width and height + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.csfd.cz/', + r''' + <video width="770" height="328" preload="none" controls poster="https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360" > + <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4" type="video/mp4" width="640" height="360"> + <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4" type="video/mp4" width="1280" height="720"> + <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4" type="video/mp4" width="1920" height="1080"> + <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm" type="video/webm" width="640" height="360"> + <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm" type="video/webm" width="1280" height="720"> + <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm" type="video/webm" width="1920" height="1080"> + <track src="https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt" type="text/x-srt" kind="subtitles" srclang="cs" label="cs"> + </video> + ''', None)[0], + { + 'formats': [{ + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4', + 'ext': 'mp4', + 'width': 640, + 'height': 360, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4', + 'ext': 'mp4', + 'width': 1280, + 'height': 720, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4', + 'ext': 'mp4', + 'width': 1920, + 'height': 1080, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm', + 'ext': 'webm', + 'width': 640, + 'height': 360, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm', + 'ext': 'webm', + 'width': 1280, + 'height': 720, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm', + 'ext': 'webm', + 'width': 1920, + 'height': 1080, + }], + 'subtitles': { + 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}] + }, + 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360' + }) + + # from https://tamasha.com/v/Kkdjw + # with height in label + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://tamasha.com/v/Kkdjw', + r''' + <video crossorigin="anonymous"> + <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="AUTO" res="0"/> + <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" + label="240p" res="240"/> + <source src="https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4" type="video/mp4" + label="144p" res="144"/> + </video> + ''', None)[0], + { + 'formats': [{ + 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', + }, { + 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', + 'ext': 'mp4', + 'format_id': '240p', + 'height': 240, + }, { + 'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4', + 'ext': 'mp4', + 'format_id': '144p', + 'height': 144, + }] + }) + + # from https://www.directvnow.com + # with data-src + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.directvnow.com', + r''' + <video id="vid1" class="header--video-masked active" muted playsinline> + <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" /> + </video> + ''', None)[0], + { + 'formats': [{ + 'ext': 'mp4', + 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', + }] + }) + + # from https://www.directvnow.com + # with data-src + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.directvnow.com', + r''' + <video id="vid1" class="header--video-masked active" muted playsinline> + <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" /> + </video> + ''', None)[0], + { + 'formats': [{ + 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', + 'ext': 'mp4', + }] + }) + + # from https://www.klarna.com/uk/ + # with data-video-src + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.directvnow.com', + r''' + <video loop autoplay muted class="responsive-video block-kl__video video-on-medium"> + <source src="" data-video-desktop data-video-src="https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4" type="video/mp4" /> + </video> + ''', None)[0], + { + 'formats': [{ + 'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4', + 'ext': 'mp4', + }], + }) + + # from https://0000.studio/ + # with type attribute but without extension in URL + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://0000.studio', + r''' + <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92" + controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain"> + </video> + ''', None)[0], + { + 'formats': [{ + 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92', + 'ext': 'mp4', + }], + }) + + def test_extract_jwplayer_data_realworld(self): + # from http://www.suffolk.edu/sjc/ + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + <script type='text/javascript'> + jwplayer('my-video').setup({ + file: 'rtmp://192.138.214.154/live/sjclive', + fallback: 'true', + width: '95%', + aspectratio: '16:9', + primary: 'flash', + mediaid:'XEgvuql4' + }); + </script> + ''', None, require_title=False), + { + 'id': 'XEgvuql4', + 'formats': [{ + 'url': 'rtmp://192.138.214.154/live/sjclive', + 'ext': 'flv' + }] + }) + + # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/ + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' +<script type="text/javascript"> + jwplayer("mediaplayer").setup({ + 'videoid': "7564", + 'width': "100%", + 'aspectratio': "16:9", + 'stretching': "exactfit", + 'autostart': 'false', + 'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf", + 'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv", + 'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg", + 'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4", + 'logo.hide': true, + 'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip", + 'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf", + 'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370", + 'controlbar': 'bottom', + 'modes': [ + {type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'} + ], + 'provider': 'http' + }); + //noinspection JSAnnotator + invideo.setup({ + adsUrl: "/banner-iframe/?zoneId=32", + adsUrl2: "", + autostart: false + }); +</script> + ''', 'dummy', require_title=False), + { + 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg', + 'formats': [{ + 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv', + 'ext': 'flv' + }] + }) + + # from http://www.indiedb.com/games/king-machine/videos + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' +<script> +jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) { + videoAnalytics("play"); +}).once("complete", function(event) { + videoAnalytics("completed"); +}); +</script> + ''', 'dummy'), + { + 'title': 'king machine trailer 1', + 'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg', + 'formats': [{ + 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4', + 'height': 360, + 'ext': 'mp4' + }, { + 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4', + 'height': 720, + 'ext': 'mp4' + }] + }) + + def test_parse_m3u8_formats(self): + _TEST_CASES = [ + ( + # https://github.com/ytdl-org/youtube-dl/issues/11995 + # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor + 'img_bipbop_adv_example_fmp4', + 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + [{ + 'format_id': 'aud1-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'language': 'en', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'audio_ext': 'mp4', + }, { + 'format_id': 'aud2-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'language': 'en', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'audio_ext': 'mp4', + }, { + 'format_id': 'aud3-English', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'language': 'en', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'audio_ext': 'mp4', + }, { + 'format_id': '530', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 480, + 'height': 270, + 'vcodec': 'avc1.640015', + }, { + 'format_id': '561', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 480, + 'height': 270, + 'vcodec': 'avc1.640015', + }, { + 'format_id': '753', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 480, + 'height': 270, + 'vcodec': 'avc1.640015', + }, { + 'format_id': '895', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 640, + 'height': 360, + 'vcodec': 'avc1.64001e', + }, { + 'format_id': '926', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 640, + 'height': 360, + 'vcodec': 'avc1.64001e', + }, { + 'format_id': '1118', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 640, + 'height': 360, + 'vcodec': 'avc1.64001e', + }, { + 'format_id': '1265', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 768, + 'height': 432, + 'vcodec': 'avc1.64001e', + }, { + 'format_id': '1295', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 768, + 'height': 432, + 'vcodec': 'avc1.64001e', + }, { + 'format_id': '1487', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 768, + 'height': 432, + 'vcodec': 'avc1.64001e', + }, { + 'format_id': '2168', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 960, + 'height': 540, + 'vcodec': 'avc1.640020', + }, { + 'format_id': '2198', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 960, + 'height': 540, + 'vcodec': 'avc1.640020', + }, { + 'format_id': '2390', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 960, + 'height': 540, + 'vcodec': 'avc1.640020', + }, { + 'format_id': '3168', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1280, + 'height': 720, + 'vcodec': 'avc1.640020', + }, { + 'format_id': '3199', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1280, + 'height': 720, + 'vcodec': 'avc1.640020', + }, { + 'format_id': '3391', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1280, + 'height': 720, + 'vcodec': 'avc1.640020', + }, { + 'format_id': '4670', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.64002a', + }, { + 'format_id': '4701', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.64002a', + }, { + 'format_id': '4893', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.64002a', + }, { + 'format_id': '6170', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.64002a', + }, { + 'format_id': '6200', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.64002a', + }, { + 'format_id': '6392', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.64002a', + }, { + 'format_id': '7968', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.64002a', + }, { + 'format_id': '7998', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.64002a', + }, { + 'format_id': '8190', + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.64002a', + }], + {} + ), + ( + 'bipbop_16x9', + 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', + [{ + 'format_id': 'bipbop_audio-BipBop Audio 2', + 'format_index': None, + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/alternate_audio_aac/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', + 'language': 'eng', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'preference': None, + 'quality': None, + 'vcodec': 'none', + 'audio_ext': 'mp4', + 'video_ext': 'none', + }, { + 'format_id': '41', + 'format_index': None, + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear0/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', + 'tbr': 41.457, + 'ext': 'mp4', + 'fps': None, + 'protocol': 'm3u8_native', + 'preference': None, + 'quality': None, + 'vcodec': 'none', + 'acodec': 'mp4a.40.2', + 'audio_ext': 'mp4', + 'video_ext': 'none', + 'abr': 41.457, + }, { + 'format_id': '263', + 'format_index': None, + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear1/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', + 'tbr': 263.851, + 'ext': 'mp4', + 'fps': None, + 'protocol': 'm3u8_native', + 'preference': None, + 'quality': None, + 'width': 416, + 'height': 234, + 'vcodec': 'avc1.4d400d', + 'acodec': 'mp4a.40.2', + 'video_ext': 'mp4', + 'audio_ext': 'none', + }, { + 'format_id': '577', + 'format_index': None, + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear2/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', + 'tbr': 577.61, + 'ext': 'mp4', + 'fps': None, + 'protocol': 'm3u8_native', + 'preference': None, + 'quality': None, + 'width': 640, + 'height': 360, + 'vcodec': 'avc1.4d401e', + 'acodec': 'mp4a.40.2', + 'video_ext': 'mp4', + 'audio_ext': 'none', + }, { + 'format_id': '915', + 'format_index': None, + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear3/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', + 'tbr': 915.905, + 'ext': 'mp4', + 'fps': None, + 'protocol': 'm3u8_native', + 'preference': None, + 'quality': None, + 'width': 960, + 'height': 540, + 'vcodec': 'avc1.4d401f', + 'acodec': 'mp4a.40.2', + 'video_ext': 'mp4', + 'audio_ext': 'none', + }, { + 'format_id': '1030', + 'format_index': None, + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear4/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', + 'tbr': 1030.138, + 'ext': 'mp4', + 'fps': None, + 'protocol': 'm3u8_native', + 'preference': None, + 'quality': None, + 'width': 1280, + 'height': 720, + 'vcodec': 'avc1.4d401f', + 'acodec': 'mp4a.40.2', + 'video_ext': 'mp4', + 'audio_ext': 'none', + }, { + 'format_id': '1924', + 'format_index': None, + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear5/prog_index.m3u8', + 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', + 'tbr': 1924.009, + 'ext': 'mp4', + 'fps': None, + 'protocol': 'm3u8_native', + 'preference': None, + 'quality': None, + 'width': 1920, + 'height': 1080, + 'vcodec': 'avc1.4d401f', + 'acodec': 'mp4a.40.2', + 'video_ext': 'mp4', + 'audio_ext': 'none', + }], + { + 'en': [{ + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng/prog_index.m3u8', + 'ext': 'vtt', + 'protocol': 'm3u8_native' + }, { + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng_forced/prog_index.m3u8', + 'ext': 'vtt', + 'protocol': 'm3u8_native' + }], + 'fr': [{ + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra/prog_index.m3u8', + 'ext': 'vtt', + 'protocol': 'm3u8_native' + }, { + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra_forced/prog_index.m3u8', + 'ext': 'vtt', + 'protocol': 'm3u8_native' + }], + 'es': [{ + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa/prog_index.m3u8', + 'ext': 'vtt', + 'protocol': 'm3u8_native' + }, { + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa_forced/prog_index.m3u8', + 'ext': 'vtt', + 'protocol': 'm3u8_native' + }], + 'ja': [{ + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn/prog_index.m3u8', + 'ext': 'vtt', + 'protocol': 'm3u8_native' + }, { + 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn_forced/prog_index.m3u8', + 'ext': 'vtt', + 'protocol': 'm3u8_native' + }], + } + ), + ] + + for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES: + with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f: + formats, subs = self.ie._parse_m3u8_formats_and_subtitles( + f.read(), m3u8_url, ext='mp4') + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + expect_value(self, subs, expected_subs, None) + + def test_parse_mpd_formats(self): + _TEST_CASES = [ + ( + # https://github.com/ytdl-org/youtube-dl/issues/13919 + # Also tests duplicate representation ids, see + # https://github.com/ytdl-org/youtube-dl/issues/15111 + 'float_duration', + 'http://unknown/manifest.mpd', # mpd_url + None, # mpd_base_url + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'm4a', + 'format_id': '318597', + 'format_note': 'DASH audio', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 61.587, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '318597', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 318.597, + 'width': 340, + 'height': 192, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '638590', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 638.59, + 'width': 512, + 'height': 288, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1022565', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 1022.565, + 'width': 688, + 'height': 384, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '2046506', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 2046.506, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '3998017', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640029', + 'tbr': 3998.017, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '5997485', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640032', + 'tbr': 5997.485, + 'width': 1920, + 'height': 1080, + }], + {}, + ), ( + # https://github.com/ytdl-org/youtube-dl/pull/14844 + 'urls_only', + 'http://unknown/manifest.mpd', # mpd_url + None, # mpd_base_url + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_144p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 200, + 'width': 256, + 'height': 144, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_240p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 400, + 'width': 424, + 'height': 240, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_360p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 800, + 'width': 640, + 'height': 360, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_480p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1200, + 'width': 856, + 'height': 480, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_576p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1600, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_720p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 2400, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_1080p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 4400, + 'width': 1920, + 'height': 1080, + }], + {}, + ), ( + # https://github.com/ytdl-org/youtube-dl/issues/20346 + # Media considered unfragmented even though it contains + # Initialization tag + 'unfragmented', + 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url + 'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url + [{ + 'url': 'https://v.redd.it/hw1x7rcg7zl21/audio', + 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', + 'ext': 'm4a', + 'format_id': 'AUDIO-1', + 'format_note': 'DASH audio', + 'container': 'm4a_dash', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 129.87, + 'asr': 48000, + + }, { + 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240', + 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', + 'ext': 'mp4', + 'format_id': 'VIDEO-2', + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'acodec': 'none', + 'vcodec': 'avc1.4d401e', + 'tbr': 608.0, + 'width': 240, + 'height': 240, + 'fps': 30, + }, { + 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360', + 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', + 'ext': 'mp4', + 'format_id': 'VIDEO-1', + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'acodec': 'none', + 'vcodec': 'avc1.4d401e', + 'tbr': 804.261, + 'width': 360, + 'height': 360, + 'fps': 30, + }], + {}, + ), ( + 'subtitles', + 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/', + [{ + 'format_id': 'audio=128001', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'm4a', + 'tbr': 128.001, + 'asr': 48000, + 'format_note': 'DASH audio', + 'container': 'm4a_dash', + 'vcodec': 'none', + 'acodec': 'mp4a.40.2', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + 'audio_ext': 'm4a', + 'video_ext': 'none', + 'abr': 128.001, + }, { + 'format_id': 'video=100000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 336, + 'height': 144, + 'tbr': 100, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 100, + }, { + 'format_id': 'video=326000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 562, + 'height': 240, + 'tbr': 326, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 326, + }, { + 'format_id': 'video=698000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 844, + 'height': 360, + 'tbr': 698, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 698, + }, { + 'format_id': 'video=1493000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 1126, + 'height': 480, + 'tbr': 1493, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 1493, + }, { + 'format_id': 'video=4482000', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'ext': 'mp4', + 'width': 1688, + 'height': 720, + 'tbr': 4482, + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'vcodec': 'avc1.4D401F', + 'acodec': 'none', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + 'video_ext': 'mp4', + 'audio_ext': 'none', + 'vbr': 4482, + }], + { + 'en': [ + { + 'ext': 'mp4', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', + 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', + 'protocol': 'http_dash_segments', + } + ] + }, + ) + ] + + for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: + with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f: + formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( + compat_etree_fromstring(f.read().encode()), + mpd_base_url=mpd_base_url, mpd_url=mpd_url) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + expect_value(self, subtitles, expected_subtitles, None) + + def test_parse_ism_formats(self): + _TEST_CASES = [ + ( + 'sintel', + 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + [{ + 'format_id': 'audio-128', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'ext': 'isma', + 'tbr': 128, + 'asr': 48000, + 'vcodec': 'none', + 'acodec': 'AACL', + 'protocol': 'ism', + 'audio_channels': 2, + '_download_params': { + 'stream_type': 'audio', + 'duration': 8880746666, + 'timescale': 10000000, + 'width': 0, + 'height': 0, + 'fourcc': 'AACL', + 'codec_private_data': '1190', + 'sampling_rate': 48000, + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video-100', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'ext': 'ismv', + 'width': 336, + 'height': 144, + 'tbr': 100, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + '_download_params': { + 'stream_type': 'video', + 'duration': 8880746666, + 'timescale': 10000000, + 'width': 336, + 'height': 144, + 'fourcc': 'AVC1', + 'codec_private_data': '00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video-326', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'ext': 'ismv', + 'width': 562, + 'height': 240, + 'tbr': 326, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + '_download_params': { + 'stream_type': 'video', + 'duration': 8880746666, + 'timescale': 10000000, + 'width': 562, + 'height': 240, + 'fourcc': 'AVC1', + 'codec_private_data': '00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video-698', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'ext': 'ismv', + 'width': 844, + 'height': 360, + 'tbr': 698, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + '_download_params': { + 'stream_type': 'video', + 'duration': 8880746666, + 'timescale': 10000000, + 'width': 844, + 'height': 360, + 'fourcc': 'AVC1', + 'codec_private_data': '00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video-1493', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'ext': 'ismv', + 'width': 1126, + 'height': 480, + 'tbr': 1493, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + '_download_params': { + 'stream_type': 'video', + 'duration': 8880746666, + 'timescale': 10000000, + 'width': 1126, + 'height': 480, + 'fourcc': 'AVC1', + 'codec_private_data': '00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video-4482', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'ext': 'ismv', + 'width': 1688, + 'height': 720, + 'tbr': 4482, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + '_download_params': { + 'stream_type': 'video', + 'duration': 8880746666, + 'timescale': 10000000, + 'width': 1688, + 'height': 720, + 'fourcc': 'AVC1', + 'codec_private_data': '00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }], + { + 'eng': [ + { + 'ext': 'ismt', + 'protocol': 'ism', + 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', + '_download_params': { + 'stream_type': 'text', + 'duration': 8880746666, + 'timescale': 10000000, + 'fourcc': 'TTML', + 'codec_private_data': '' + } + } + ] + }, + ), + ( + 'ec-3_test', + 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + [{ + 'format_id': 'audio_deu-127', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'isma', + 'tbr': 127, + 'asr': 48000, + 'vcodec': 'none', + 'acodec': 'AACL', + 'protocol': 'ism', + 'language': 'deu', + 'audio_channels': 2, + '_download_params': { + 'stream_type': 'audio', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 0, + 'height': 0, + 'fourcc': 'AACL', + 'language': 'deu', + 'codec_private_data': '1190', + 'sampling_rate': 48000, + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'audio_deu_1-224', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'isma', + 'tbr': 224, + 'asr': 48000, + 'vcodec': 'none', + 'acodec': 'EC-3', + 'protocol': 'ism', + 'language': 'deu', + 'audio_channels': 6, + '_download_params': { + 'stream_type': 'audio', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 0, + 'height': 0, + 'fourcc': 'EC-3', + 'language': 'deu', + 'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00', + 'sampling_rate': 48000, + 'channels': 6, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video_deu-23', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'ismv', + 'width': 384, + 'height': 216, + 'tbr': 23, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + 'language': 'deu', + '_download_params': { + 'stream_type': 'video', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 384, + 'height': 216, + 'fourcc': 'AVC1', + 'language': 'deu', + 'codec_private_data': '000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video_deu-403', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'ismv', + 'width': 400, + 'height': 224, + 'tbr': 403, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + 'language': 'deu', + '_download_params': { + 'stream_type': 'video', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 400, + 'height': 224, + 'fourcc': 'AVC1', + 'language': 'deu', + 'codec_private_data': '00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video_deu-680', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'ismv', + 'width': 640, + 'height': 360, + 'tbr': 680, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + 'language': 'deu', + '_download_params': { + 'stream_type': 'video', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 640, + 'height': 360, + 'fourcc': 'AVC1', + 'language': 'deu', + 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video_deu-1253', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'ismv', + 'width': 640, + 'height': 360, + 'tbr': 1253, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + 'vbr': 1253, + 'language': 'deu', + '_download_params': { + 'stream_type': 'video', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 640, + 'height': 360, + 'fourcc': 'AVC1', + 'language': 'deu', + 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video_deu-2121', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'ismv', + 'width': 768, + 'height': 432, + 'tbr': 2121, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + 'language': 'deu', + '_download_params': { + 'stream_type': 'video', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 768, + 'height': 432, + 'fourcc': 'AVC1', + 'language': 'deu', + 'codec_private_data': '00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video_deu-3275', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'ismv', + 'width': 1280, + 'height': 720, + 'tbr': 3275, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + 'language': 'deu', + '_download_params': { + 'stream_type': 'video', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 1280, + 'height': 720, + 'fourcc': 'AVC1', + 'language': 'deu', + 'codec_private_data': '00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video_deu-5300', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'ismv', + 'width': 1920, + 'height': 1080, + 'tbr': 5300, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + 'language': 'deu', + '_download_params': { + 'stream_type': 'video', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 1920, + 'height': 1080, + 'fourcc': 'AVC1', + 'language': 'deu', + 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }, { + 'format_id': 'video_deu-8079', + 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', + 'ext': 'ismv', + 'width': 1920, + 'height': 1080, + 'tbr': 8079, + 'vcodec': 'AVC1', + 'acodec': 'none', + 'protocol': 'ism', + 'language': 'deu', + '_download_params': { + 'stream_type': 'video', + 'duration': 370000000, + 'timescale': 10000000, + 'width': 1920, + 'height': 1080, + 'fourcc': 'AVC1', + 'language': 'deu', + 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80', + 'channels': 2, + 'bits_per_sample': 16, + 'nal_unit_length_field': 4 + }, + }], + {}, + ), + ] + + for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES: + with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f: + formats, subtitles = self.ie._parse_ism_formats_and_subtitles( + compat_etree_fromstring(f.read().encode()), ism_url=ism_url) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + expect_value(self, subtitles, expected_subtitles, None) + + def test_parse_f4m_formats(self): + _TEST_CASES = [ + ( + # https://github.com/ytdl-org/youtube-dl/issues/14660 + 'custom_base_url', + 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + [{ + 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + 'ext': 'flv', + 'format_id': '2148', + 'protocol': 'f4m', + 'tbr': 2148, + 'width': 1280, + 'height': 720, + }] + ), + ] + + for f4m_file, f4m_url, expected_formats in _TEST_CASES: + with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f: + formats = self.ie._parse_f4m_formats( + compat_etree_fromstring(f.read().encode()), + f4m_url, None) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + + def test_parse_xspf(self): + _TEST_CASES = [ + ( + 'foo_xspf', + 'https://example.org/src/foo_xspf.xspf', + [{ + 'id': 'foo_xspf', + 'title': 'Pandemonium', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 202.416, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/cd1/track%201.mp3', + }], + }, { + 'id': 'foo_xspf', + 'title': 'Final Cartridge (Nichico Twelve Remix)', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 255.857, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', + }], + }, { + 'id': 'foo_xspf', + 'title': 'Rebuilding Nightingale', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 287.915, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/track3.mp3', + }, { + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.com/track3.mp3', + }] + }] + ), + ] + + for xspf_file, xspf_url, expected_entries in _TEST_CASES: + with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f: + entries = self.ie._parse_xspf( + compat_etree_fromstring(f.read().encode()), + xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) + expect_value(self, entries, expected_entries, None) + for i in range(len(entries)): + expect_dict(self, entries[i], expected_entries[i]) + + def test_response_with_expected_status_returns_content(self): + # Checks for mitigations against the effects of + # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which + # manifest as `_download_webpage`, `_download_xml`, `_download_json`, + # or the underlying `_download_webpage_handle` returning no content + # when a response matches `expected_status`. + + httpd = http.server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + port = http_server_port(httpd) + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.daemon = True + server_thread.start() + + (content, urlh) = self.ie._download_webpage_handle( + 'http://127.0.0.1:%d/teapot' % port, None, + expected_status=TEAPOT_RESPONSE_STATUS) + self.assertEqual(content, TEAPOT_RESPONSE_BODY) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py new file mode 100644 index 0000000..6be47af --- /dev/null +++ b/test/test_YoutubeDL.py @@ -0,0 +1,1346 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import copy +import json + +from test.helper import FakeYDL, assertRegexpMatches, try_rm +from yt_dlp import YoutubeDL +from yt_dlp.compat import compat_os_name +from yt_dlp.extractor import YoutubeIE +from yt_dlp.extractor.common import InfoExtractor +from yt_dlp.postprocessor.common import PostProcessor +from yt_dlp.utils import ( + ExtractorError, + LazyList, + OnDemandPagedList, + int_or_none, + match_filter_func, +) +from yt_dlp.utils.traversal import traverse_obj + +TEST_URL = 'http://localhost/sample.mp4' + + +class YDL(FakeYDL): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.downloaded_info_dicts = [] + self.msgs = [] + + def process_info(self, info_dict): + self.downloaded_info_dicts.append(info_dict.copy()) + + def to_screen(self, msg, *args, **kwargs): + self.msgs.append(msg) + + def dl(self, *args, **kwargs): + assert False, 'Downloader must not be invoked for test_YoutubeDL' + + +def _make_result(formats, **kwargs): + res = { + 'formats': formats, + 'id': 'testid', + 'title': 'testttitle', + 'extractor': 'testex', + 'extractor_key': 'TestEx', + 'webpage_url': 'http://example.com/watch?v=shenanigans', + } + res.update(**kwargs) + return res + + +class TestFormatSelection(unittest.TestCase): + def test_prefer_free_formats(self): + # Same resolution => download webm + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {'ext': 'webm', 'height': 460, 'url': TEST_URL}, + {'ext': 'mp4', 'height': 460, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'webm') + + # Different resolution => download best quality (mp4) + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {'ext': 'webm', 'height': 720, 'url': TEST_URL}, + {'ext': 'mp4', 'height': 1080, 'url': TEST_URL}, + ] + info_dict['formats'] = formats + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'mp4') + + # No prefer_free_formats => prefer mp4 and webm + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [ + {'ext': 'webm', 'height': 720, 'url': TEST_URL}, + {'ext': 'mp4', 'height': 720, 'url': TEST_URL}, + {'ext': 'flv', 'height': 720, 'url': TEST_URL}, + ] + info_dict['formats'] = formats + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'mp4') + + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [ + {'ext': 'flv', 'height': 720, 'url': TEST_URL}, + {'ext': 'webm', 'height': 720, 'url': TEST_URL}, + ] + info_dict['formats'] = formats + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'webm') + + def test_format_selection(self): + formats = [ + {'format_id': '35', 'ext': 'mp4', 'preference': 0, 'url': TEST_URL}, + {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL}, + {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL}, + {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL}, + {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + def test(inp, *expected, multi=False): + ydl = YDL({ + 'format': inp, + 'allow_multiple_video_streams': multi, + 'allow_multiple_audio_streams': multi, + }) + ydl.process_ie_result(info_dict.copy()) + downloaded = map(lambda x: x['format_id'], ydl.downloaded_info_dicts) + self.assertEqual(list(downloaded), list(expected)) + + test('20/47', '47') + test('20/71/worst', '35') + test(None, '2') + test('webm/mp4', '47') + test('3gp/40/mp4', '35') + test('example-with-dashes', 'example-with-dashes') + test('all', '2', '47', '45', 'example-with-dashes', '35') + test('mergeall', '2+47+45+example-with-dashes+35', multi=True) + # See: https://github.com/yt-dlp/yt-dlp/pulls/8797 + test('7_a/worst', '35') + + def test_format_selection_audio(self): + formats = [ + {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestaudio'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'audio-high') + + ydl = YDL({'format': 'worstaudio'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'audio-low') + + formats = [ + {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, + {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestaudio/worstaudio/best'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vid-high') + + def test_format_selection_audio_exts(self): + formats = [ + {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, + ] + + info_dict = _make_result(formats) + ydl = YDL({'format': 'best'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'aac-64') + + ydl = YDL({'format': 'mp3'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'mp3-64') + + ydl = YDL({'prefer_free_formats': True}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'ogg-64') + + def test_format_selection_video(self): + formats = [ + {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestvideo'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'dash-video-high') + + ydl = YDL({'format': 'worstvideo'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'dash-video-low') + + ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'dash-video-low') + + formats = [ + {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') + + def test_format_selection_string_ops(self): + formats = [ + {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, + {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + # equals (=) + ydl = YDL({'format': '[format_id=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not equal (!=) + ydl = YDL({'format': '[format_id!=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # starts with (^=) + ydl = YDL({'format': '[format_id^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not start with (!^=) + ydl = YDL({'format': '[format_id!^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # ends with ($=) + ydl = YDL({'format': '[format_id$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not end with (!$=) + ydl = YDL({'format': '[format_id!$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # contains (*=) + ydl = YDL({'format': '[format_id*=bc-cb]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not contain (!*=) + ydl = YDL({'format': '[format_id!*=bc-cb]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + ydl = YDL({'format': '[format_id!*=-]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + def test_youtube_format_selection(self): + # FIXME: Rewrite in accordance with the new format sorting options + return + + order = [ + '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13', + # Apple HTTP Live Streaming + '96', '95', '94', '93', '92', '132', '151', + # 3D + '85', '84', '102', '83', '101', '82', '100', + # Dash video + '137', '248', '136', '247', '135', '246', + '245', '244', '134', '243', '133', '242', '160', + # Dash audio + '141', '172', '140', '171', '139', + ] + + def format_info(f_id): + info = YoutubeIE._formats[f_id].copy() + + # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec' + # and 'vcodec', while in tests such information is incomplete since + # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593 + # test_YoutubeDL.test_youtube_format_selection is broken without + # this fix + if 'acodec' in info and 'vcodec' not in info: + info['vcodec'] = 'none' + elif 'vcodec' in info and 'acodec' not in info: + info['acodec'] = 'none' + + info['format_id'] = f_id + info['url'] = 'url:' + f_id + return info + formats_order = [format_info(f_id) for f_id in order] + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': 'bestvideo+bestaudio'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '248+172') + self.assertEqual(downloaded['ext'], 'mp4') + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '38') + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': 'bestvideo/best,bestaudio'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['137', '141']) + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['137+141', '248+141']) + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['136+141', '247+141']) + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['248+141']) + + for f1, f2 in zip(formats_order, formats_order[1:]): + info_dict = _make_result([f1, f2], extractor='youtube') + ydl = YDL({'format': 'best/bestvideo'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], f1['format_id']) + + info_dict = _make_result([f2, f1], extractor='youtube') + ydl = YDL({'format': 'best/bestvideo'}) + ydl.sort_formats(info_dict) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], f1['format_id']) + + def test_audio_only_extractor_format_selection(self): + # For extractors with incomplete formats (all formats are audio-only or + # video-only) best and worst should fallback to corresponding best/worst + # video-only or audio-only formats (as per + # https://github.com/ytdl-org/youtube-dl/pull/5556) + formats = [ + {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'best'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'high') + + ydl = YDL({'format': 'worst'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'low') + + def test_format_not_available(self): + formats = [ + {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL}, + {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + # This must fail since complete video-audio format does not match filter + # and extractor does not provide incomplete only formats (i.e. only + # video-only or audio-only). + ydl = YDL({'format': 'best[height>360]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + def test_format_selection_issue_10083(self): + # See https://github.com/ytdl-org/youtube-dl/issues/10083 + formats = [ + {'format_id': 'regular', 'height': 360, 'url': TEST_URL}, + {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'}) + ydl.process_ie_result(info_dict.copy()) + self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio') + + def test_invalid_format_specs(self): + def assert_syntax_error(format_spec): + self.assertRaises(SyntaxError, YDL, {'format': format_spec}) + + assert_syntax_error('bestvideo,,best') + assert_syntax_error('+bestaudio') + assert_syntax_error('bestvideo+') + assert_syntax_error('/') + assert_syntax_error('[720<height]') + + def test_format_filtering(self): + formats = [ + {'format_id': 'A', 'filesize': 500, 'width': 1000}, + {'format_id': 'B', 'filesize': 1000, 'width': 500}, + {'format_id': 'C', 'filesize': 1000, 'width': 400}, + {'format_id': 'D', 'filesize': 2000, 'width': 600}, + {'format_id': 'E', 'filesize': 3000}, + {'format_id': 'F'}, + {'format_id': 'G', 'filesize': 1000000}, + ] + for f in formats: + f['url'] = 'http://_/' + f['ext'] = 'unknown' + info_dict = _make_result(formats, _format_sort_fields=('id', )) + + ydl = YDL({'format': 'best[filesize<3000]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'D') + + ydl = YDL({'format': 'best[filesize<=3000]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'E') + + ydl = YDL({'format': 'best[filesize <= ? 3000]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'F') + + ydl = YDL({'format': 'best [filesize = 1000] [width>450]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'B') + + ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'C') + + ydl = YDL({'format': '[filesize>?1]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'G') + + ydl = YDL({'format': '[filesize<1M]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'E') + + ydl = YDL({'format': '[filesize<1MiB]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'G') + + ydl = YDL({'format': 'all[width>=400][width<=600]'}) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['D', 'C', 'B']) + + ydl = YDL({'format': 'best[height<40]'}) + try: + ydl.process_ie_result(info_dict) + except ExtractorError: + pass + self.assertEqual(ydl.downloaded_info_dicts, []) + + def test_default_format_spec(self): + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best') + + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') + + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo*+bestaudio/best') + + ydl = YDL({'outtmpl': '-'}) + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo*+bestaudio/best') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') + + +class TestYoutubeDL(unittest.TestCase): + def test_subtitles(self): + def s_formats(lang, autocaption=False): + return [{ + 'ext': ext, + 'url': f'http://localhost/video.{lang}.{ext}', + '_auto': autocaption, + } for ext in ['vtt', 'srt', 'ass']] + subtitles = {l: s_formats(l) for l in ['en', 'fr', 'es']} + auto_captions = {l: s_formats(l, True) for l in ['it', 'pt', 'es']} + info_dict = { + 'id': 'test', + 'title': 'Test', + 'url': 'http://localhost/video.mp4', + 'subtitles': subtitles, + 'automatic_captions': auto_captions, + 'extractor': 'TEST', + 'webpage_url': 'http://example.com/watch?v=shenanigans', + } + + def get_info(params={}): + params.setdefault('simulate', True) + ydl = YDL(params) + ydl.report_warning = lambda *args, **kargs: None + return ydl.process_video_result(info_dict, download=False) + + result = get_info() + self.assertFalse(result.get('requested_subtitles')) + self.assertEqual(result['subtitles'], subtitles) + self.assertEqual(result['automatic_captions'], auto_captions) + + result = get_info({'writesubtitles': True}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), {'en'}) + self.assertTrue(subs['en'].get('data') is None) + self.assertEqual(subs['en']['ext'], 'ass') + + result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'}) + subs = result['requested_subtitles'] + self.assertEqual(subs['en']['ext'], 'srt') + + result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), {'es', 'fr'}) + + result = get_info({'writesubtitles': True, 'subtitleslangs': ['all', '-en']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), {'es', 'fr'}) + + result = get_info({'writesubtitles': True, 'subtitleslangs': ['en', 'fr', '-en']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), {'fr'}) + + result = get_info({'writesubtitles': True, 'subtitleslangs': ['-en', 'en']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), {'en'}) + + result = get_info({'writesubtitles': True, 'subtitleslangs': ['e.+']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), {'es', 'en'}) + + result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), {'es', 'pt'}) + self.assertFalse(subs['es']['_auto']) + self.assertTrue(subs['pt']['_auto']) + + result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), {'es', 'pt'}) + self.assertTrue(subs['es']['_auto']) + self.assertTrue(subs['pt']['_auto']) + + def test_add_extra_info(self): + test_dict = { + 'extractor': 'Foo', + } + extra_info = { + 'extractor': 'Bar', + 'playlist': 'funny videos', + } + YDL.add_extra_info(test_dict, extra_info) + self.assertEqual(test_dict['extractor'], 'Foo') + self.assertEqual(test_dict['playlist'], 'funny videos') + + outtmpl_info = { + 'id': '1234', + 'ext': 'mp4', + 'width': None, + 'height': 1080, + 'filesize': 1024, + 'title1': '$PATH', + 'title2': '%PATH%', + 'title3': 'foo/bar\\test', + 'title4': 'foo "bar" test', + 'title5': 'áéí 𝐀', + 'timestamp': 1618488000, + 'duration': 100000, + 'playlist_index': 1, + 'playlist_autonumber': 2, + '__last_playlist_index': 100, + 'n_entries': 10, + 'formats': [ + {'id': 'id 1', 'height': 1080, 'width': 1920}, + {'id': 'id 2', 'height': 720}, + {'id': 'id 3'} + ] + } + + def test_prepare_outtmpl_and_filename(self): + def test(tmpl, expected, *, info=None, **params): + params['outtmpl'] = tmpl + ydl = FakeYDL(params) + ydl._num_downloads = 1 + self.assertEqual(ydl.validate_outtmpl(tmpl), None) + + out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info) + fname = ydl.prepare_filename(info or self.outtmpl_info) + + if not isinstance(expected, (list, tuple)): + expected = (expected, expected) + for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected): + if callable(expect): + self.assertTrue(expect(got), f'Wrong {name} from {tmpl}') + elif expect is not None: + self.assertEqual(got, expect, f'Wrong {name} from {tmpl}') + + # Side-effects + original_infodict = dict(self.outtmpl_info) + test('foo.bar', 'foo.bar') + original_infodict['epoch'] = self.outtmpl_info.get('epoch') + self.assertTrue(isinstance(original_infodict['epoch'], int)) + test('%(epoch)d', int_or_none) + self.assertEqual(original_infodict, self.outtmpl_info) + + # Auto-generated fields + test('%(id)s.%(ext)s', '1234.mp4') + test('%(duration_string)s', ('27:46:40', '27-46-40')) + test('%(resolution)s', '1080p') + test('%(playlist_index|)s', '001') + test('%(playlist_index&{}!)s', '1!') + test('%(playlist_autonumber)s', '02') + test('%(autonumber)s', '00001') + test('%(autonumber+2)03d', '005', autonumber_start=3) + test('%(autonumber)s', '001', autonumber_size=3) + + # Escaping % + test('%', '%') + test('%%', '%') + test('%%%%', '%%') + test('%s', '%s') + test('%%%s', '%%s') + test('%d', '%d') + test('%abc%', '%abc%') + test('%%(width)06d.%(ext)s', '%(width)06d.mp4') + test('%%%(height)s', '%1080') + test('%(width)06d.%(ext)s', 'NA.mp4') + test('%(width)06d.%%(ext)s', 'NA.%(ext)s') + test('%%(width)06d.%(ext)s', '%(width)06d.mp4') + + # ID sanitization + test('%(id)s', '_abcd', info={'id': '_abcd'}) + test('%(some_id)s', '_abcd', info={'some_id': '_abcd'}) + test('%(formats.0.id)s', '_abcd', info={'formats': [{'id': '_abcd'}]}) + test('%(id)s', '-abcd', info={'id': '-abcd'}) + test('%(id)s', '.abcd', info={'id': '.abcd'}) + test('%(id)s', 'ab__cd', info={'id': 'ab__cd'}) + test('%(id)s', ('ab:cd', 'ab:cd'), info={'id': 'ab:cd'}) + test('%(id.0)s', '-', info={'id': '--'}) + + # Invalid templates + self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError)) + test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none') + test('%(..)s', 'NA') + test('%(formats.{id)s', 'NA') + + # Entire info_dict + def expect_same_infodict(out): + got_dict = json.loads(out) + for info_field, expected in self.outtmpl_info.items(): + self.assertEqual(got_dict.get(info_field), expected, info_field) + return True + + test('%()j', (expect_same_infodict, None)) + + # NA placeholder + NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' + test(NA_TEST_OUTTMPL, 'NA-NA-def-1234.mp4') + test(NA_TEST_OUTTMPL, 'none-none-def-1234.mp4', outtmpl_na_placeholder='none') + test(NA_TEST_OUTTMPL, '--def-1234.mp4', outtmpl_na_placeholder='') + test('%(non_existent.0)s', 'NA') + + # String formatting + FMT_TEST_OUTTMPL = '%%(height)%s.%%(ext)s' + test(FMT_TEST_OUTTMPL % 's', '1080.mp4') + test(FMT_TEST_OUTTMPL % 'd', '1080.mp4') + test(FMT_TEST_OUTTMPL % '6d', ' 1080.mp4') + test(FMT_TEST_OUTTMPL % '-6d', '1080 .mp4') + test(FMT_TEST_OUTTMPL % '06d', '001080.mp4') + test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4') + test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4') + test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4') + test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4') + test(FMT_TEST_OUTTMPL % ' 0 6d', ' 01080.mp4') + + # Type casting + test('%(id)d', '1234') + test('%(height)c', '1') + test('%(ext)c', 'm') + test('%(id)d %(id)r', "1234 '1234'") + test('%(id)r %(height)r', "'1234' 1080") + test('%(title5)a %(height)a', (R"'\xe1\xe9\xed \U0001d400' 1080", None)) + test('%(ext)s-%(ext|def)d', 'mp4-def') + test('%(width|0)04d', '0') + test('a%(width|b)d', 'ab', outtmpl_na_placeholder='none') + + FORMATS = self.outtmpl_info['formats'] + + # Custom type casting + test('%(formats.:.id)l', 'id 1, id 2, id 3') + test('%(formats.:.id)#l', ('id 1\nid 2\nid 3', 'id 1 id 2 id 3')) + test('%(ext)l', 'mp4') + test('%(formats.:.id) 18l', ' id 1, id 2, id 3') + test('%(formats)j', (json.dumps(FORMATS), None)) + test('%(formats)#j', ( + json.dumps(FORMATS, indent=4), + json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', """).replace('\n', ' ') + )) + test('%(title5).3B', 'á') + test('%(title5)U', 'áéí 𝐀') + test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀') + test('%(title5)+U', 'áéí A') + test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A') + test('%(height)D', '1k') + test('%(filesize)#D', '1Ki') + test('%(height)5.2D', ' 1.08k') + test('%(title4)#S', 'foo_bar_test') + test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' '))) + if compat_os_name == 'nt': + test('%(title4)q', ('"foo ""bar"" test"', None)) + test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None)) + test('%(formats.0.id)#q', ('"id 1"', None)) + else: + test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\'')) + test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'") + test('%(formats.0.id)#q', "'id 1'") + + # Internal formatting + test('%(timestamp-1000>%H-%M-%S)s', '11-43-20') + test('%(title|%)s %(title|%%)s', '% %%') + test('%(id+1-height+3)05d', '00158') + test('%(width+100)05d', 'NA') + test('%(filesize*8)d', '8192') + test('%(formats.0) 15s', ('% 15s' % FORMATS[0], None)) + test('%(formats.0)r', (repr(FORMATS[0]), None)) + test('%(height.0)03d', '001') + test('%(-height.0)04d', '-001') + test('%(formats.-1.id)s', FORMATS[-1]['id']) + test('%(formats.0.id.-1)d', FORMATS[0]['id'][-1]) + test('%(formats.3)s', 'NA') + test('%(formats.:2:-1)r', repr(FORMATS[:2:-1])) + test('%(formats.0.id.-1+id)f', '1235.000000') + test('%(formats.0.id.-1+formats.1.id.-1)d', '3') + out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]} + if 'height' in f else {'id': f['id']} + for f in FORMATS]) + test('%(formats.:.{id,height.:2})j', (out, None)) + test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS)) + test('%(.{id,title})j', ('{"id": "1234"}', '{"id": "1234"}')) + + # Alternates + test('%(title,id)s', '1234') + test('%(width-100,height+20|def)d', '1100') + test('%(width-100,height+width|def)s', 'def') + test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00') + + # Replacement + test('%(id&foo)s.bar', 'foo.bar') + test('%(title&foo)s.bar', 'NA.bar') + test('%(title&foo|baz)s.bar', 'baz.bar') + test('%(x,id&foo|baz)s.bar', 'foo.bar') + test('%(x,title&foo|baz)s.bar', 'baz.bar') + test('%(id&a\nb|)s', ('a\nb', 'a b')) + test('%(id&hi {:>10} {}|)s', 'hi 1234 1234') + test(R'%(id&{0} {}|)s', 'NA') + test(R'%(id&{0.1}|)s', 'NA') + test('%(height&{:,d})S', '1,080') + + # Laziness + def gen(): + yield from range(5) + raise self.assertTrue(False, 'LazyList should not be evaluated till here') + test('%(key.4)s', '4', info={'key': LazyList(gen())}) + + # Empty filename + test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4') + # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme + # test('%(foo|)s', ('', '_')) # fixme + + # Environment variable expansion for prepare_filename + os.environ['__yt_dlp_var'] = 'expanded' + envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' + test(envvar, (envvar, 'expanded')) + if compat_os_name == 'nt': + test('%s%', ('%s%', '%s%')) + os.environ['s'] = 'expanded' + test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s + os.environ['(test)s'] = 'expanded' + test('%(test)s%', ('NA%', 'expanded')) # Environment should take priority over template + + # Path expansion and escaping + test('Hello %(title1)s', 'Hello $PATH') + test('Hello %(title2)s', 'Hello %PATH%') + test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test')) + test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo⧸bar⧹test' % os.path.sep)) + + def test_format_note(self): + ydl = YoutubeDL() + self.assertEqual(ydl._format_note({}), '') + assertRegexpMatches(self, ydl._format_note({ + 'vbr': 10, + }), r'^\s*10k$') + assertRegexpMatches(self, ydl._format_note({ + 'fps': 30, + }), r'^30fps$') + + def test_postprocessors(self): + filename = 'post-processor-testfile.mp4' + audiofile = filename + '.mp3' + + class SimplePP(PostProcessor): + def run(self, info): + with open(audiofile, 'w') as f: + f.write('EXAMPLE') + return [info['filepath']], info + + def run_pp(params, PP): + with open(filename, 'w') as f: + f.write('EXAMPLE') + ydl = YoutubeDL(params) + ydl.add_post_processor(PP()) + ydl.post_process(filename, {'filepath': filename}) + + run_pp({'keepvideo': True}, SimplePP) + self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) + self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + os.unlink(filename) + os.unlink(audiofile) + + run_pp({'keepvideo': False}, SimplePP) + self.assertFalse(os.path.exists(filename), '%s exists' % filename) + self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + os.unlink(audiofile) + + class ModifierPP(PostProcessor): + def run(self, info): + with open(info['filepath'], 'w') as f: + f.write('MODIFIED') + return [], info + + run_pp({'keepvideo': False}, ModifierPP) + self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) + os.unlink(filename) + + def test_match_filter(self): + first = { + 'id': '1', + 'url': TEST_URL, + 'title': 'one', + 'extractor': 'TEST', + 'duration': 30, + 'filesize': 10 * 1024, + 'playlist_id': '42', + 'uploader': "變態妍字幕版 太妍 тест", + 'creator': "тест ' 123 ' тест--", + 'webpage_url': 'http://example.com/watch?v=shenanigans', + } + second = { + 'id': '2', + 'url': TEST_URL, + 'title': 'two', + 'extractor': 'TEST', + 'duration': 10, + 'description': 'foo', + 'filesize': 5 * 1024, + 'playlist_id': '43', + 'uploader': "тест 123", + 'webpage_url': 'http://example.com/watch?v=SHENANIGANS', + } + videos = [first, second] + + def get_videos(filter_=None): + ydl = YDL({'match_filter': filter_, 'simulate': True}) + for v in videos: + ydl.process_ie_result(v.copy(), download=True) + return [v['id'] for v in ydl.downloaded_info_dicts] + + res = get_videos() + self.assertEqual(res, ['1', '2']) + + def f(v, incomplete): + if v['id'] == '1': + return None + else: + return 'Video id is not 1' + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('duration < 30') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('description = foo') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('description =? foo') + res = get_videos(f) + self.assertEqual(res, ['1', '2']) + + f = match_filter_func('filesize > 5KiB') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('playlist_id = 42') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('creator = "тест \' 123 \' тест--"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func("creator = 'тест \\' 123 \\' тест--'") + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30") + res = get_videos(f) + self.assertEqual(res, []) + + def test_playlist_items_selection(self): + INDICES, PAGE_SIZE = list(range(1, 11)), 3 + + def entry(i, evaluated): + evaluated.append(i) + return { + 'id': str(i), + 'title': str(i), + 'url': TEST_URL, + } + + def pagedlist_entries(evaluated): + def page_func(n): + start = PAGE_SIZE * n + for i in INDICES[start: start + PAGE_SIZE]: + yield entry(i, evaluated) + return OnDemandPagedList(page_func, PAGE_SIZE) + + def page_num(i): + return (i + PAGE_SIZE - 1) // PAGE_SIZE + + def generator_entries(evaluated): + for i in INDICES: + yield entry(i, evaluated) + + def list_entries(evaluated): + return list(generator_entries(evaluated)) + + def lazylist_entries(evaluated): + return LazyList(generator_entries(evaluated)) + + def get_downloaded_info_dicts(params, entries): + ydl = YDL(params) + ydl.process_ie_result({ + '_type': 'playlist', + 'id': 'test', + 'extractor': 'test:playlist', + 'extractor_key': 'test:playlist', + 'webpage_url': 'http://example.com', + 'entries': entries, + }) + return ydl.downloaded_info_dicts + + def test_selection(params, expected_ids, evaluate_all=False): + expected_ids = list(expected_ids) + if evaluate_all: + generator_eval = pagedlist_eval = INDICES + elif not expected_ids: + generator_eval = pagedlist_eval = [] + else: + generator_eval = INDICES[0: max(expected_ids)] + pagedlist_eval = INDICES[PAGE_SIZE * page_num(min(expected_ids)) - PAGE_SIZE: + PAGE_SIZE * page_num(max(expected_ids))] + + for name, func, expected_eval in ( + ('list', list_entries, INDICES), + ('Generator', generator_entries, generator_eval), + # ('LazyList', lazylist_entries, generator_eval), # Generator and LazyList follow the exact same code path + ('PagedList', pagedlist_entries, pagedlist_eval), + ): + evaluated = [] + entries = func(evaluated) + results = [(v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index'])) + for v in get_downloaded_info_dicts(params, entries)] + self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))), f'Entries of {name} for {params}') + self.assertEqual(sorted(evaluated), expected_eval, f'Evaluation of {name} for {params}') + + test_selection({}, INDICES) + test_selection({'playlistend': 20}, INDICES, True) + test_selection({'playlistend': 2}, INDICES[:2]) + test_selection({'playliststart': 11}, [], True) + test_selection({'playliststart': 2}, INDICES[1:]) + test_selection({'playlist_items': '2-4'}, INDICES[1:4]) + test_selection({'playlist_items': '2,4'}, [2, 4]) + test_selection({'playlist_items': '20'}, [], True) + test_selection({'playlist_items': '0'}, []) + + # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591 + test_selection({'playlist_items': '2-4,3-4,3'}, [2, 3, 4]) + test_selection({'playlist_items': '4,2'}, [4, 2]) + + # Tests for https://github.com/yt-dlp/yt-dlp/issues/720 + # https://github.com/yt-dlp/yt-dlp/issues/302 + test_selection({'playlistreverse': True}, INDICES[::-1]) + test_selection({'playliststart': 2, 'playlistreverse': True}, INDICES[:0:-1]) + test_selection({'playlist_items': '2,4', 'playlistreverse': True}, [4, 2]) + test_selection({'playlist_items': '4,2'}, [4, 2]) + + # Tests for --playlist-items start:end:step + test_selection({'playlist_items': ':'}, INDICES, True) + test_selection({'playlist_items': '::1'}, INDICES, True) + test_selection({'playlist_items': '::-1'}, INDICES[::-1], True) + test_selection({'playlist_items': ':6'}, INDICES[:6]) + test_selection({'playlist_items': ':-6'}, INDICES[:-5], True) + test_selection({'playlist_items': '-1:6:-2'}, INDICES[:4:-2], True) + test_selection({'playlist_items': '9:-6:-2'}, INDICES[8:3:-2], True) + + test_selection({'playlist_items': '1:inf:2'}, INDICES[::2], True) + test_selection({'playlist_items': '-2:inf'}, INDICES[-2:], True) + test_selection({'playlist_items': ':inf:-1'}, [], True) + test_selection({'playlist_items': '0-2:2'}, [2]) + test_selection({'playlist_items': '1-:2'}, INDICES[::2], True) + test_selection({'playlist_items': '0--2:2'}, INDICES[1:-1:2], True) + + test_selection({'playlist_items': '10::3'}, [10], True) + test_selection({'playlist_items': '-1::3'}, [10], True) + test_selection({'playlist_items': '11::3'}, [], True) + test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True) + test_selection({'playlist_items': '-15::15'}, [], True) + + def test_do_not_override_ie_key_in_url_transparent(self): + ydl = YDL() + + class Foo1IE(InfoExtractor): + _VALID_URL = r'foo1:' + + def _real_extract(self, url): + return { + '_type': 'url_transparent', + 'url': 'foo2:', + 'ie_key': 'Foo2', + 'title': 'foo1 title', + 'id': 'foo1_id', + } + + class Foo2IE(InfoExtractor): + _VALID_URL = r'foo2:' + + def _real_extract(self, url): + return { + '_type': 'url', + 'url': 'foo3:', + 'ie_key': 'Foo3', + } + + class Foo3IE(InfoExtractor): + _VALID_URL = r'foo3:' + + def _real_extract(self, url): + return _make_result([{'url': TEST_URL}], title='foo3 title') + + ydl.add_info_extractor(Foo1IE(ydl)) + ydl.add_info_extractor(Foo2IE(ydl)) + ydl.add_info_extractor(Foo3IE(ydl)) + ydl.extract_info('foo1:') + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['url'], TEST_URL) + self.assertEqual(downloaded['title'], 'foo1 title') + self.assertEqual(downloaded['id'], 'testid') + self.assertEqual(downloaded['extractor'], 'testex') + self.assertEqual(downloaded['extractor_key'], 'TestEx') + + # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064 + def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self): + + class _YDL(YDL): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def trouble(self, s, tb=None): + pass + + ydl = _YDL({ + 'format': 'extra', + 'ignoreerrors': True, + }) + + class VideoIE(InfoExtractor): + _VALID_URL = r'video:(?P<id>\d+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + formats = [{ + 'format_id': 'default', + 'url': 'url:', + }] + if video_id == '0': + raise ExtractorError('foo') + if video_id == '2': + formats.append({ + 'format_id': 'extra', + 'url': TEST_URL, + }) + return { + 'id': video_id, + 'title': 'Video %s' % video_id, + 'formats': formats, + } + + class PlaylistIE(InfoExtractor): + _VALID_URL = r'playlist:' + + def _entries(self): + for n in range(3): + video_id = str(n) + yield { + '_type': 'url_transparent', + 'ie_key': VideoIE.ie_key(), + 'id': video_id, + 'url': 'video:%s' % video_id, + 'title': 'Video Transparent %s' % video_id, + } + + def _real_extract(self, url): + return self.playlist_result(self._entries()) + + ydl.add_info_extractor(VideoIE(ydl)) + ydl.add_info_extractor(PlaylistIE(ydl)) + info = ydl.extract_info('playlist:') + entries = info['entries'] + self.assertEqual(len(entries), 3) + self.assertTrue(entries[0] is None) + self.assertTrue(entries[1] is None) + self.assertEqual(len(ydl.downloaded_info_dicts), 1) + downloaded = ydl.downloaded_info_dicts[0] + entries[2].pop('requested_downloads', None) + self.assertEqual(entries[2], downloaded) + self.assertEqual(downloaded['url'], TEST_URL) + self.assertEqual(downloaded['title'], 'Video Transparent 2') + self.assertEqual(downloaded['id'], '2') + self.assertEqual(downloaded['extractor'], 'Video') + self.assertEqual(downloaded['extractor_key'], 'Video') + + def test_header_cookies(self): + from http.cookiejar import Cookie + + ydl = FakeYDL() + ydl.report_warning = lambda *_, **__: None + + def cookie(name, value, version=None, domain='', path='', secure=False, expires=None): + return Cookie( + version or 0, name, value, None, False, + domain, bool(domain), bool(domain), path, bool(path), + secure, expires, False, None, None, rest={}) + + _test_url = 'https://yt.dlp/test' + + def test(encoded_cookies, cookies, *, headers=False, round_trip=None, error_re=None): + def _test(): + ydl.cookiejar.clear() + ydl._load_cookies(encoded_cookies, autoscope=headers) + if headers: + ydl._apply_header_cookies(_test_url) + data = {'url': _test_url} + ydl._calc_headers(data) + self.assertCountEqual( + map(vars, ydl.cookiejar), map(vars, cookies), + 'Extracted cookiejar.Cookie is not the same') + if not headers: + self.assertEqual( + data.get('cookies'), round_trip or encoded_cookies, + 'Cookie is not the same as round trip') + ydl.__dict__['_YoutubeDL__header_cookies'] = [] + + with self.subTest(msg=encoded_cookies): + if not error_re: + _test() + return + with self.assertRaisesRegex(Exception, error_re): + _test() + + test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')]) + test('test=value', [cookie('test', 'value')], error_re=r'Unscoped cookies are not allowed') + test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [ + cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'), + cookie('cookie2', 'value2', domain='.yt.dlp', path='/')]) + test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [ + cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)]) + test('test="value; "; path=/test; domain=.yt.dlp', [ + cookie('test', 'value; ', domain='.yt.dlp', path='/test')], + round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test') + test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')], + round_trip='name=""; Domain=.yt.dlp') + + test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True) + test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error_re=r'Invalid syntax') + ydl.deprecated_feature = ydl.report_error + test('test=value', [], headers=True, error_re=r'Passing cookies as a header is a potential security risk') + + def test_infojson_cookies(self): + TEST_FILE = 'test_infojson_cookies.info.json' + TEST_URL = 'https://example.com/example.mp4' + COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com' + COOKIE_HEADER = {'Cookie': 'a=b; c=d'} + + ydl = FakeYDL() + ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE) + + def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False): + fmt = {'url': TEST_URL} + if fmts_header_cookies: + fmt['http_headers'] = COOKIE_HEADER + if cookies_field: + fmt['cookies'] = COOKIES + return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None) + + def test(initial_info, note): + result = {} + result['processed'] = ydl.process_ie_result(initial_info) + self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL), + msg=f'No cookies set in cookiejar after initial process when {note}') + ydl.cookiejar.clear() + with open(TEST_FILE) as infojson: + result['loaded'] = ydl.sanitize_info(json.load(infojson), True) + result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False) + self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL), + msg=f'No cookies set in cookiejar after final process when {note}') + ydl.cookiejar.clear() + for key in ('processed', 'loaded', 'final'): + info = result[key] + self.assertIsNone( + traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False), + msg=f'Cookie header not removed in {key} result when {note}') + self.assertEqual( + traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES, + msg=f'No cookies field found in {key} result when {note}') + + test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field') + test(make_info(info_header_cookies=True), 'info_dict header cokies') + test(make_info(fmts_header_cookies=True), 'format header cookies') + test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies') + test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields') + test(make_info(cookies_field=True), 'cookies format field') + test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only') + + try_rm(TEST_FILE) + + def test_add_headers_cookie(self): + def check_for_cookie_header(result): + return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False) + + ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}}) + ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com + + fmt = {'url': 'https://example.com/video.mp4'} + result = ydl.process_ie_result(_make_result([fmt]), download=False) + self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict') + self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field') + self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar') + + fmt = {'url': 'https://wrong.com/video.mp4'} + result = ydl.process_ie_result(_make_result([fmt]), download=False) + self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain') + self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain') + self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py new file mode 100644 index 0000000..fdb9bae --- /dev/null +++ b/test/test_YoutubeDLCookieJar.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import re +import tempfile + +from yt_dlp.cookies import YoutubeDLCookieJar + + +class TestYoutubeDLCookieJar(unittest.TestCase): + def test_keep_session_cookies(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') + cookiejar.load() + tf = tempfile.NamedTemporaryFile(delete=False) + try: + cookiejar.save(filename=tf.name) + temp = tf.read().decode() + self.assertTrue(re.search( + r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp)) + self.assertTrue(re.search( + r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp)) + finally: + tf.close() + os.remove(tf.name) + + def test_strip_httponly_prefix(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') + cookiejar.load() + + def assert_cookie_has_value(key): + self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') + + assert_cookie_has_value('HTTPONLY_COOKIE') + assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') + + def test_malformed_cookies(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt') + cookiejar.load() + # Cookies should be empty since all malformed cookie file entries + # will be ignored + self.assertFalse(cookiejar._cookies) + + def test_get_cookie_header(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') + cookiejar.load() + header = cookiejar.get_cookie_header('https://www.foobar.foobar') + self.assertIn('HTTPONLY_COOKIE', header) + + def test_get_cookies_for_url(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') + cookiejar.load() + cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/') + self.assertEqual(len(cookies), 2) + cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/') + self.assertFalse(cookies) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_aes.py b/test/test_aes.py new file mode 100644 index 0000000..a26abfd --- /dev/null +++ b/test/test_aes.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import base64 + +from yt_dlp.aes import ( + aes_cbc_decrypt, + aes_cbc_decrypt_bytes, + aes_cbc_encrypt, + aes_ctr_decrypt, + aes_ctr_encrypt, + aes_decrypt, + aes_decrypt_text, + aes_ecb_decrypt, + aes_ecb_encrypt, + aes_encrypt, + aes_gcm_decrypt_and_verify, + aes_gcm_decrypt_and_verify_bytes, + key_expansion, + pad_block, +) +from yt_dlp.dependencies import Cryptodome +from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes + +# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' + + +class TestAES(unittest.TestCase): + def setUp(self): + self.key = self.iv = [0x20, 0x15] + 14 * [0] + self.secret_msg = b'Secret message goes here' + + def test_encrypt(self): + msg = b'message' + key = list(range(16)) + encrypted = aes_encrypt(bytes_to_intlist(msg), key) + decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) + self.assertEqual(decrypted, msg) + + def test_cbc_decrypt(self): + data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' + decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + if Cryptodome.AES: + decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + + def test_cbc_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) + self.assertEqual( + encrypted, + b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd') + + def test_ctr_decrypt(self): + data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') + decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + + def test_ctr_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv)) + self.assertEqual( + encrypted, + b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') + + def test_gcm_decrypt(self): + data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd' + authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e' + + decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( + bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + if Cryptodome.AES: + decrypted = aes_gcm_decrypt_and_verify_bytes( + data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + + def test_decrypt_text(self): + password = intlist_to_bytes(self.key).decode() + encrypted = base64.b64encode( + intlist_to_bytes(self.iv[:8]) + + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + ).decode() + decrypted = (aes_decrypt_text(encrypted, password, 16)) + self.assertEqual(decrypted, self.secret_msg) + + password = intlist_to_bytes(self.key).decode() + encrypted = base64.b64encode( + intlist_to_bytes(self.iv[:8]) + + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' + ).decode() + decrypted = (aes_decrypt_text(encrypted, password, 32)) + self.assertEqual(decrypted, self.secret_msg) + + def test_ecb_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) + self.assertEqual( + encrypted, + b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + + def test_ecb_decrypt(self): + data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + + def test_key_expansion(self): + key = '4f6bdaa39e2f8cb07f5e722d9edef314' + + self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [ + 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14, + 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21, + 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5, + 0x2D, 0xAD, 0xDE, 0x47, 0x6C, 0x5A, 0xAF, 0x86, 0x9F, 0xBA, 0x00, 0x72, 0x40, 0x93, 0x82, 0xA7, + 0xF9, 0xBE, 0x82, 0x4E, 0x95, 0xE4, 0x2D, 0xC8, 0x0A, 0x5E, 0x2D, 0xBA, 0x4A, 0xCD, 0xAF, 0x1D, + 0x54, 0xC7, 0x26, 0x98, 0xC1, 0x23, 0x0B, 0x50, 0xCB, 0x7D, 0x26, 0xEA, 0x81, 0xB0, 0x89, 0xF7, + 0x93, 0x60, 0x4E, 0x94, 0x52, 0x43, 0x45, 0xC4, 0x99, 0x3E, 0x63, 0x2E, 0x18, 0x8E, 0xEA, 0xD9, + 0xCA, 0xE7, 0x7B, 0x39, 0x98, 0xA4, 0x3E, 0xFD, 0x01, 0x9A, 0x5D, 0xD3, 0x19, 0x14, 0xB7, 0x0A, + 0xB0, 0x4E, 0x1C, 0xED, 0x28, 0xEA, 0x22, 0x10, 0x29, 0x70, 0x7F, 0xC3, 0x30, 0x64, 0xC8, 0xC9, + 0xE8, 0xA6, 0xC1, 0xE9, 0xC0, 0x4C, 0xE3, 0xF9, 0xE9, 0x3C, 0x9C, 0x3A, 0xD9, 0x58, 0x54, 0xF3, + 0xB4, 0x86, 0xCC, 0xDC, 0x74, 0xCA, 0x2F, 0x25, 0x9D, 0xF6, 0xB3, 0x1F, 0x44, 0xAE, 0xE7, 0xEC]) + + def test_pad_block(self): + block = [0x21, 0xA0, 0x43, 0xFF] + + self.assertEqual(pad_block(block, 'pkcs7'), + block + [0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C]) + + self.assertEqual(pad_block(block, 'iso7816'), + block + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + + self.assertEqual(pad_block(block, 'whitespace'), + block + [0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20]) + + self.assertEqual(pad_block(block, 'zero'), + block + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + + block = list(range(16)) + for mode in ('pkcs7', 'iso7816', 'whitespace', 'zero'): + self.assertEqual(pad_block(block, mode), block, mode) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py new file mode 100644 index 0000000..6810759 --- /dev/null +++ b/test/test_age_restriction.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import is_download_test, try_rm +from yt_dlp import YoutubeDL +from yt_dlp.utils import DownloadError + + +def _download_restricted(url, filename, age): + """ Returns true if the file has been downloaded """ + + params = { + 'age_limit': age, + 'skip_download': True, + 'writeinfojson': True, + 'outtmpl': '%(id)s.%(ext)s', + } + ydl = YoutubeDL(params) + ydl.add_default_info_extractors() + json_filename = os.path.splitext(filename)[0] + '.info.json' + try_rm(json_filename) + try: + ydl.download([url]) + except DownloadError: + pass + else: + return os.path.exists(json_filename) + finally: + try_rm(json_filename) + + +@is_download_test +class TestAgeRestriction(unittest.TestCase): + def _assert_restricted(self, url, filename, age, old_age=None): + self.assertTrue(_download_restricted(url, filename, old_age)) + self.assertFalse(_download_restricted(url, filename, age)) + + def test_youtube(self): + self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10) + + def test_youporn(self): + self._assert_restricted( + 'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/', + '16715086.mp4', 2, old_age=25) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_all_urls.py b/test/test_all_urls.py new file mode 100644 index 0000000..848c96f --- /dev/null +++ b/test/test_all_urls.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import collections + +from test.helper import gettestcases +from yt_dlp.extractor import FacebookIE, YoutubeIE, gen_extractors + + +class TestAllURLsMatching(unittest.TestCase): + def setUp(self): + self.ies = gen_extractors() + + def matching_ies(self, url): + return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] + + def assertMatch(self, url, ie_list): + self.assertEqual(self.matching_ies(url), ie_list) + + def test_youtube_playlist_matching(self): + assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) + assertTab = lambda url: self.assertMatch(url, ['youtube:tab']) + assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585 + assertPlaylist('PL63F0C78739B09958') + assertTab('https://www.youtube.com/AsapSCIENCE') + assertTab('https://www.youtube.com/embedded') + assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') + assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') + assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 + self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) + # Top tracks + assertTab('https://www.youtube.com/playlist?list=MCUS.20142101') + + def test_youtube_matching(self): + self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) + self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668 + self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) + # self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid + self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) + + def test_youtube_channel_matching(self): + assertChannel = lambda url: self.assertMatch(url, ['youtube:tab']) + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') + + def test_youtube_user_matching(self): + self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab']) + + def test_youtube_feeds(self): + self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab']) + self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab']) + self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab']) + self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab']) + + def test_youtube_search_matching(self): + self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) + self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) + + def test_facebook_matching(self): + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) + + def test_no_duplicates(self): + ies = gen_extractors() + for tc in gettestcases(include_onlymatching=True): + url = tc['url'] + for ie in ies: + if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): + self.assertTrue(ie.suitable(url), f'{type(ie).__name__} should match URL {url!r}') + else: + self.assertFalse( + ie.suitable(url), + f'{type(ie).__name__} should not match URL {url!r} . That URL belongs to {tc["name"]}.') + + def test_keywords(self): + self.assertMatch(':ytsubs', ['youtube:subscriptions']) + self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) + self.assertMatch(':ythistory', ['youtube:history']) + + def test_vimeo_matching(self): + self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo']) + self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) + + # https://github.com/ytdl-org/youtube-dl/issues/1930 + def test_soundcloud_not_matching_sets(self): + self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) + + def test_tumblr(self): + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr']) + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) + + def test_pbs(self): + # https://github.com/ytdl-org/youtube-dl/issues/2350 + self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) + self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) + + def test_no_duplicated_ie_names(self): + name_accu = collections.defaultdict(list) + for ie in self.ies: + name_accu[ie.IE_NAME.lower()].append(type(ie).__name__) + for (ie_name, ie_list) in name_accu.items(): + self.assertEqual( + len(ie_list), 1, + f'Multiple extractors with the same IE_NAME "{ie_name}" ({", ".join(ie_list)})') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_cache.py b/test/test_cache.py new file mode 100644 index 0000000..ce1624b --- /dev/null +++ b/test/test_cache.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import shutil + +from test.helper import FakeYDL +from yt_dlp.cache import Cache + + +def _is_empty(d): + return not bool(os.listdir(d)) + + +def _mkdir(d): + if not os.path.exists(d): + os.mkdir(d) + + +class TestCache(unittest.TestCase): + def setUp(self): + TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') + _mkdir(TESTDATA_DIR) + self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test') + self.tearDown() + + def tearDown(self): + if os.path.exists(self.test_dir): + shutil.rmtree(self.test_dir) + + def test_cache(self): + ydl = FakeYDL({ + 'cachedir': self.test_dir, + }) + c = Cache(ydl) + obj = {'x': 1, 'y': ['ä', '\\a', True]} + self.assertEqual(c.load('test_cache', 'k.'), None) + c.store('test_cache', 'k.', obj) + self.assertEqual(c.load('test_cache', 'k2'), None) + self.assertFalse(_is_empty(self.test_dir)) + self.assertEqual(c.load('test_cache', 'k.'), obj) + self.assertEqual(c.load('test_cache', 'y'), None) + self.assertEqual(c.load('test_cache2', 'k.'), None) + c.remove() + self.assertFalse(os.path.exists(self.test_dir)) + self.assertEqual(c.load('test_cache', 'k.'), None) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_compat.py b/test/test_compat.py new file mode 100644 index 0000000..71ca7f9 --- /dev/null +++ b/test/test_compat.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import struct + +from yt_dlp import compat +from yt_dlp.compat import urllib # isort: split +from yt_dlp.compat import ( + compat_etree_fromstring, + compat_expanduser, + compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, +) +from yt_dlp.compat.urllib.request import getproxies + + +class TestCompat(unittest.TestCase): + def test_compat_passthrough(self): + with self.assertWarns(DeprecationWarning): + compat.compat_basestring + + with self.assertWarns(DeprecationWarning): + compat.WINDOWS_VT_MODE + + self.assertEqual(urllib.request.getproxies, getproxies) + + with self.assertWarns(DeprecationWarning): + compat.compat_pycrypto_AES # Must not raise error + + def test_compat_expanduser(self): + old_home = os.environ.get('HOME') + test_str = R'C:\Documents and Settings\тест\Application Data' + try: + os.environ['HOME'] = test_str + self.assertEqual(compat_expanduser('~'), test_str) + finally: + os.environ['HOME'] = old_home or '' + + def test_compat_urllib_parse_unquote(self): + self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') + self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def') + self.assertEqual(compat_urllib_parse_unquote(''), '') + self.assertEqual(compat_urllib_parse_unquote('%'), '%') + self.assertEqual(compat_urllib_parse_unquote('%%'), '%%') + self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%') + self.assertEqual(compat_urllib_parse_unquote('%2F'), '/') + self.assertEqual(compat_urllib_parse_unquote('%2f'), '/') + self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波') + self.assertEqual( + compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" /> +%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''), + '''<meta property="og:description" content="▁▂▃▄%▅▆▇█" /> +%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''') + self.assertEqual( + compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''), + '''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''') + + def test_compat_urllib_parse_unquote_plus(self): + self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def') + self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def') + + def test_compat_urllib_parse_urlencode(self): + self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') + + def test_compat_etree_fromstring(self): + xml = ''' + <root foo="bar" spam="中文"> + <normal>foo</normal> + <chinese>中文</chinese> + <foo><bar>spam</bar></foo> + </root> + ''' + doc = compat_etree_fromstring(xml.encode()) + self.assertTrue(isinstance(doc.attrib['foo'], str)) + self.assertTrue(isinstance(doc.attrib['spam'], str)) + self.assertTrue(isinstance(doc.find('normal').text, str)) + self.assertTrue(isinstance(doc.find('chinese').text, str)) + self.assertTrue(isinstance(doc.find('foo/bar').text, str)) + + def test_compat_etree_fromstring_doctype(self): + xml = '''<?xml version="1.0"?> +<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd"> +<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>''' + compat_etree_fromstring(xml) + + def test_struct_unpack(self): + self.assertEqual(struct.unpack('!B', b'\x00'), (0,)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_config.py b/test/test_config.py new file mode 100644 index 0000000..a393b65 --- /dev/null +++ b/test/test_config.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest +import unittest.mock + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import contextlib +import itertools +from pathlib import Path + +from yt_dlp.compat import compat_expanduser +from yt_dlp.options import create_parser, parseOpts +from yt_dlp.utils import Config, get_executable_path + +ENVIRON_DEFAULTS = { + 'HOME': None, + 'XDG_CONFIG_HOME': '/_xdg_config_home/', + 'USERPROFILE': 'C:/Users/testing/', + 'APPDATA': 'C:/Users/testing/AppData/Roaming/', + 'HOMEDRIVE': 'C:/', + 'HOMEPATH': 'Users/testing/', +} + + +@contextlib.contextmanager +def set_environ(**kwargs): + saved_environ = os.environ.copy() + + for name, value in {**ENVIRON_DEFAULTS, **kwargs}.items(): + if value is None: + os.environ.pop(name, None) + else: + os.environ[name] = value + + yield + + os.environ.clear() + os.environ.update(saved_environ) + + +def _generate_expected_groups(): + xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config') + appdata_dir = os.getenv('appdata') + home_dir = compat_expanduser('~') + return { + 'Portable': [ + Path(get_executable_path(), 'yt-dlp.conf'), + ], + 'Home': [ + Path('yt-dlp.conf'), + ], + 'User': [ + Path(xdg_config_home, 'yt-dlp.conf'), + Path(xdg_config_home, 'yt-dlp', 'config'), + Path(xdg_config_home, 'yt-dlp', 'config.txt'), + *(( + Path(appdata_dir, 'yt-dlp.conf'), + Path(appdata_dir, 'yt-dlp', 'config'), + Path(appdata_dir, 'yt-dlp', 'config.txt'), + ) if appdata_dir else ()), + Path(home_dir, 'yt-dlp.conf'), + Path(home_dir, 'yt-dlp.conf.txt'), + Path(home_dir, '.yt-dlp', 'config'), + Path(home_dir, '.yt-dlp', 'config.txt'), + ], + 'System': [ + Path('/etc/yt-dlp.conf'), + Path('/etc/yt-dlp/config'), + Path('/etc/yt-dlp/config.txt'), + ] + } + + +class TestConfig(unittest.TestCase): + maxDiff = None + + @set_environ() + def test_config__ENVIRON_DEFAULTS_sanity(self): + expected = make_expected() + self.assertCountEqual( + set(expected), expected, + 'ENVIRON_DEFAULTS produces non unique names') + + def test_config_all_environ_values(self): + for name, value in ENVIRON_DEFAULTS.items(): + for new_value in (None, '', '.', value or '/some/dir'): + with set_environ(**{name: new_value}): + self._simple_grouping_test() + + def test_config_default_expected_locations(self): + files, _ = self._simple_config_test() + self.assertEqual( + files, make_expected(), + 'Not all expected locations have been checked') + + def test_config_default_grouping(self): + self._simple_grouping_test() + + def _simple_grouping_test(self): + expected_groups = make_expected_groups() + for name, group in expected_groups.items(): + for index, existing_path in enumerate(group): + result, opts = self._simple_config_test(existing_path) + expected = expected_from_expected_groups(expected_groups, existing_path) + self.assertEqual( + result, expected, + f'The checked locations do not match the expected ({name}, {index})') + self.assertEqual( + opts.outtmpl['default'], '1', + f'The used result value was incorrect ({name}, {index})') + + def _simple_config_test(self, *stop_paths): + encountered = 0 + paths = [] + + def read_file(filename, default=[]): + nonlocal encountered + path = Path(filename) + paths.append(path) + if path in stop_paths: + encountered += 1 + return ['-o', f'{encountered}'] + + with ConfigMock(read_file): + _, opts, _ = parseOpts([], False) + + return paths, opts + + @set_environ() + def test_config_early_exit_commandline(self): + self._early_exit_test(0, '--ignore-config') + + @set_environ() + def test_config_early_exit_files(self): + for index, _ in enumerate(make_expected(), 1): + self._early_exit_test(index) + + def _early_exit_test(self, allowed_reads, *args): + reads = 0 + + def read_file(filename, default=[]): + nonlocal reads + reads += 1 + + if reads > allowed_reads: + self.fail('The remaining config was not ignored') + elif reads == allowed_reads: + return ['--ignore-config'] + + with ConfigMock(read_file): + parseOpts(args, False) + + @set_environ() + def test_config_override_commandline(self): + self._override_test(0, '-o', 'pass') + + @set_environ() + def test_config_override_files(self): + for index, _ in enumerate(make_expected(), 1): + self._override_test(index) + + def _override_test(self, start_index, *args): + index = 0 + + def read_file(filename, default=[]): + nonlocal index + index += 1 + + if index > start_index: + return ['-o', 'fail'] + elif index == start_index: + return ['-o', 'pass'] + + with ConfigMock(read_file): + _, opts, _ = parseOpts(args, False) + + self.assertEqual( + opts.outtmpl['default'], 'pass', + 'The earlier group did not override the later ones') + + +@contextlib.contextmanager +def ConfigMock(read_file=None): + with unittest.mock.patch('yt_dlp.options.Config') as mock: + mock.return_value = Config(create_parser()) + if read_file is not None: + mock.read_file = read_file + + yield mock + + +def make_expected(*filepaths): + return expected_from_expected_groups(_generate_expected_groups(), *filepaths) + + +def make_expected_groups(*filepaths): + return _filter_expected_groups(_generate_expected_groups(), filepaths) + + +def expected_from_expected_groups(expected_groups, *filepaths): + return list(itertools.chain.from_iterable( + _filter_expected_groups(expected_groups, filepaths).values())) + + +def _filter_expected_groups(expected, filepaths): + if not filepaths: + return expected + + result = {} + for group, paths in expected.items(): + new_paths = [] + for path in paths: + new_paths.append(path) + if path in filepaths: + break + + result[group] = new_paths + + return result + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_cookies.py b/test/test_cookies.py new file mode 100644 index 0000000..5282ef6 --- /dev/null +++ b/test/test_cookies.py @@ -0,0 +1,306 @@ +import unittest +from datetime import datetime, timezone + +from yt_dlp import cookies +from yt_dlp.cookies import ( + LenientSimpleCookie, + LinuxChromeCookieDecryptor, + MacChromeCookieDecryptor, + WindowsChromeCookieDecryptor, + _get_linux_desktop_environment, + _LinuxDesktopEnvironment, + parse_safari_cookies, + pbkdf2_sha1, +) + + +class Logger: + def debug(self, message, *args, **kwargs): + print(f'[verbose] {message}') + + def info(self, message, *args, **kwargs): + print(message) + + def warning(self, message, *args, **kwargs): + self.error(message) + + def error(self, message, *args, **kwargs): + raise Exception(message) + + +class MonkeyPatch: + def __init__(self, module, temporary_values): + self._module = module + self._temporary_values = temporary_values + self._backup_values = {} + + def __enter__(self): + for name, temp_value in self._temporary_values.items(): + self._backup_values[name] = getattr(self._module, name) + setattr(self._module, name, temp_value) + + def __exit__(self, exc_type, exc_val, exc_tb): + for name, backup_value in self._backup_values.items(): + setattr(self._module, name, backup_value) + + +class TestCookies(unittest.TestCase): + def test_get_desktop_environment(self): + """ based on https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util_unittest.cc """ + test_cases = [ + ({}, _LinuxDesktopEnvironment.OTHER), + ({'DESKTOP_SESSION': 'my_custom_de'}, _LinuxDesktopEnvironment.OTHER), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de'}, _LinuxDesktopEnvironment.OTHER), + + ({'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME), + ({'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME), + ({'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4), + ({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3), + ({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE), + + ({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME), + ({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE3), + ({'KDE_FULL_SESSION': 1, 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4), + + ({'XDG_CURRENT_DESKTOP': 'X-Cinnamon'}, _LinuxDesktopEnvironment.CINNAMON), + ({'XDG_CURRENT_DESKTOP': 'Deepin'}, _LinuxDesktopEnvironment.DEEPIN), + ({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME), + ({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME), + ({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME), + + ({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME), + ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE5), + ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '6'}, _LinuxDesktopEnvironment.KDE6), + ({'XDG_CURRENT_DESKTOP': 'KDE'}, _LinuxDesktopEnvironment.KDE4), + ({'XDG_CURRENT_DESKTOP': 'Pantheon'}, _LinuxDesktopEnvironment.PANTHEON), + ({'XDG_CURRENT_DESKTOP': 'UKUI'}, _LinuxDesktopEnvironment.UKUI), + ({'XDG_CURRENT_DESKTOP': 'Unity'}, _LinuxDesktopEnvironment.UNITY), + ({'XDG_CURRENT_DESKTOP': 'Unity:Unity7'}, _LinuxDesktopEnvironment.UNITY), + ({'XDG_CURRENT_DESKTOP': 'Unity:Unity8'}, _LinuxDesktopEnvironment.UNITY), + ] + + for env, expected_desktop_environment in test_cases: + self.assertEqual(_get_linux_desktop_environment(env, Logger()), expected_desktop_environment) + + def test_chrome_cookie_decryptor_linux_derive_key(self): + key = LinuxChromeCookieDecryptor.derive_key(b'abc') + self.assertEqual(key, b'7\xa1\xec\xd4m\xfcA\xc7\xb19Z\xd0\x19\xdcM\x17') + + def test_chrome_cookie_decryptor_mac_derive_key(self): + key = MacChromeCookieDecryptor.derive_key(b'abc') + self.assertEqual(key, b'Y\xe2\xc0\xd0P\xf6\xf4\xe1l\xc1\x8cQ\xcb|\xcdY') + + def test_chrome_cookie_decryptor_linux_v10(self): + with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}): + encrypted_value = b'v10\xccW%\xcd\xe6\xe6\x9fM" \xa7\xb0\xca\xe4\x07\xd6' + value = 'USD' + decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) + self.assertEqual(decryptor.decrypt(encrypted_value), value) + + def test_chrome_cookie_decryptor_linux_v11(self): + with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}): + encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd' + value = 'tz=Europe.London' + decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) + self.assertEqual(decryptor.decrypt(encrypted_value), value) + + def test_chrome_cookie_decryptor_windows_v10(self): + with MonkeyPatch(cookies, { + '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&' + }): + encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad=' + value = '32101439' + decryptor = WindowsChromeCookieDecryptor('', Logger()) + self.assertEqual(decryptor.decrypt(encrypted_value), value) + + def test_chrome_cookie_decryptor_mac_v10(self): + with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}): + encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc' + value = '2021-06-01-22' + decryptor = MacChromeCookieDecryptor('', Logger()) + self.assertEqual(decryptor.decrypt(encrypted_value), value) + + def test_safari_cookie_parsing(self): + cookies = \ + b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' \ + b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' \ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' \ + b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \ + b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \ + b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00(' + + jar = parse_safari_cookies(cookies) + self.assertEqual(len(jar), 1) + cookie = list(jar)[0] + self.assertEqual(cookie.domain, 'localhost') + self.assertEqual(cookie.port, None) + self.assertEqual(cookie.path, '/') + self.assertEqual(cookie.name, 'foo') + self.assertEqual(cookie.value, 'test%20%3Bcookie') + self.assertFalse(cookie.secure) + expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc) + self.assertEqual(cookie.expires, int(expected_expiration.timestamp())) + + def test_pbkdf2_sha1(self): + key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16) + self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34') + + +class TestLenientSimpleCookie(unittest.TestCase): + def _run_tests(self, *cases): + for message, raw_cookie, expected in cases: + cookie = LenientSimpleCookie(raw_cookie) + + with self.subTest(message, expected=expected): + self.assertEqual(cookie.keys(), expected.keys(), message) + + for key, expected_value in expected.items(): + morsel = cookie[key] + if isinstance(expected_value, tuple): + expected_value, expected_attributes = expected_value + else: + expected_attributes = {} + + attributes = { + key: value + for key, value in dict(morsel).items() + if value != "" + } + self.assertEqual(attributes, expected_attributes, message) + + self.assertEqual(morsel.value, expected_value, message) + + def test_parsing(self): + self._run_tests( + # Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py + ( + "Test basic cookie", + "chips=ahoy; vienna=finger", + {"chips": "ahoy", "vienna": "finger"}, + ), + ( + "Test quoted cookie", + 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', + {"keebler": 'E=mc2; L="Loves"; fudge=\012;'}, + ), + ( + "Allow '=' in an unquoted value", + "keebler=E=mc2", + {"keebler": "E=mc2"}, + ), + ( + "Allow cookies with ':' in their name", + "key:term=value:term", + {"key:term": "value:term"}, + ), + ( + "Allow '[' and ']' in cookie values", + "a=b; c=[; d=r; f=h", + {"a": "b", "c": "[", "d": "r", "f": "h"}, + ), + ( + "Test basic cookie attributes", + 'Customer="WILE_E_COYOTE"; Version=1; Path=/acme', + {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})}, + ), + ( + "Test flag only cookie attributes", + 'Customer="WILE_E_COYOTE"; HttpOnly; Secure', + {"Customer": ("WILE_E_COYOTE", {"httponly": True, "secure": True})}, + ), + ( + "Test flag only attribute with values", + "eggs=scrambled; httponly=foo; secure=bar; Path=/bacon", + {"eggs": ("scrambled", {"httponly": "foo", "secure": "bar", "path": "/bacon"})}, + ), + ( + "Test special case for 'expires' attribute, 4 digit year", + 'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT', + {"Customer": ("W", {"expires": "Wed, 01 Jan 2010 00:00:00 GMT"})}, + ), + ( + "Test special case for 'expires' attribute, 2 digit year", + 'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT', + {"Customer": ("W", {"expires": "Wed, 01 Jan 98 00:00:00 GMT"})}, + ), + ( + "Test extra spaces in keys and values", + "eggs = scrambled ; secure ; path = bar ; foo=foo ", + {"eggs": ("scrambled", {"secure": True, "path": "bar"}), "foo": "foo"}, + ), + ( + "Test quoted attributes", + 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"', + {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})} + ), + # Our own tests that CPython passes + ( + "Allow ';' in quoted value", + 'chips="a;hoy"; vienna=finger', + {"chips": "a;hoy", "vienna": "finger"}, + ), + ( + "Keep only the last set value", + "a=c; a=b", + {"a": "b"}, + ), + ) + + def test_lenient_parsing(self): + self._run_tests( + ( + "Ignore and try to skip invalid cookies", + 'chips={"ahoy;": 1}; vienna="finger;"', + {"vienna": "finger;"}, + ), + ( + "Ignore cookies without a name", + "a=b; unnamed; c=d", + {"a": "b", "c": "d"}, + ), + ( + "Ignore '\"' cookie without name", + 'a=b; "; c=d', + {"a": "b", "c": "d"}, + ), + ( + "Skip all space separated values", + "x a=b c=d x; e=f", + {"a": "b", "c": "d", "e": "f"}, + ), + ( + "Skip all space separated values", + 'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x', + {"a": "b", "c": "d"}, + ), + ( + "Expect quote mending", + 'a=b; invalid="; c=d', + {"a": "b", "c": "d"}, + ), + ( + "Reset morsel after invalid to not capture attributes", + "a=b; invalid; Version=1; c=d", + {"a": "b", "c": "d"}, + ), + ( + "Reset morsel after invalid to not capture attributes", + "a=b; $invalid; $Version=1; c=d", + {"a": "b", "c": "d"}, + ), + ( + "Continue after non-flag attribute without value", + "a=b; path; Version=1; c=d", + {"a": "b", "c": "d"}, + ), + ( + "Allow cookie attributes with `$` prefix", + 'Customer="WILE_E_COYOTE"; $Version=1; $Secure; $Path=/acme', + {"Customer": ("WILE_E_COYOTE", {"version": "1", "secure": True, "path": "/acme"})}, + ), + ( + "Invalid Morsel keys should not result in an error", + "Key=Value; [Invalid]=Value; Another=Value", + {"Key": "Value", "Another": "Value"}, + ), + ) diff --git a/test/test_download.py b/test/test_download.py new file mode 100755 index 0000000..2530792 --- /dev/null +++ b/test/test_download.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import collections +import hashlib +import json + +from test.helper import ( + assertGreaterEqual, + expect_info_dict, + expect_warnings, + get_params, + gettestcases, + getwebpagetestcases, + is_download_test, + report_warning, + try_rm, +) + +import yt_dlp.YoutubeDL # isort: split +from yt_dlp.extractor import get_info_extractor +from yt_dlp.networking.exceptions import HTTPError, TransportError +from yt_dlp.utils import ( + DownloadError, + ExtractorError, + UnavailableVideoError, + YoutubeDLError, + format_bytes, + join_nonempty, +) + +RETRIES = 3 + + +class YoutubeDL(yt_dlp.YoutubeDL): + def __init__(self, *args, **kwargs): + self.to_stderr = self.to_screen + self.processed_info_dicts = [] + super().__init__(*args, **kwargs) + + def report_warning(self, message, *args, **kwargs): + # Don't accept warnings during tests + raise ExtractorError(message) + + def process_info(self, info_dict): + self.processed_info_dicts.append(info_dict.copy()) + return super().process_info(info_dict) + + +def _file_md5(fn): + with open(fn, 'rb') as f: + return hashlib.md5(f.read()).hexdigest() + + +normal_test_cases = gettestcases() +webpage_test_cases = getwebpagetestcases() +tests_counter = collections.defaultdict(collections.Counter) + + +@is_download_test +class TestDownload(unittest.TestCase): + # Parallel testing in nosetests. See + # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html + _multiprocess_shared_ = True + + maxDiff = None + + COMPLETED_TESTS = {} + + def __str__(self): + """Identify each test with the `add_ie` attribute, if available.""" + cls, add_ie = type(self), getattr(self, self._testMethodName).add_ie + return f'{self._testMethodName} ({cls.__module__}.{cls.__name__}){f" [{add_ie}]" if add_ie else ""}:' + + +# Dynamically generate tests + +def generator(test_case, tname): + def test_template(self): + if self.COMPLETED_TESTS.get(tname): + return + self.COMPLETED_TESTS[tname] = True + ie = yt_dlp.extractor.get_info_extractor(test_case['name'])() + other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])] + is_playlist = any(k.startswith('playlist') for k in test_case) + test_cases = test_case.get( + 'playlist', [] if is_playlist else [test_case]) + + def print_skipping(reason): + print('Skipping %s: %s' % (test_case['name'], reason)) + self.skipTest(reason) + + if not ie.working(): + print_skipping('IE marked as not _WORKING') + + for tc in test_cases: + if tc.get('expected_exception'): + continue + info_dict = tc.get('info_dict', {}) + params = tc.get('params', {}) + if not info_dict.get('id'): + raise Exception(f'Test {tname} definition incorrect - "id" key is not present') + elif not info_dict.get('ext') and info_dict.get('_type', 'video') == 'video': + if params.get('skip_download') and params.get('ignore_no_formats_error'): + continue + raise Exception(f'Test {tname} definition incorrect - "ext" key must be present to define the output file') + + if 'skip' in test_case: + print_skipping(test_case['skip']) + + for other_ie in other_ies: + if not other_ie.working(): + print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) + + params = get_params(test_case.get('params', {})) + params['outtmpl'] = tname + '_' + params['outtmpl'] + if is_playlist and 'playlist' not in test_case: + params.setdefault('extract_flat', 'in_playlist') + params.setdefault('playlistend', test_case.get( + 'playlist_mincount', test_case.get('playlist_count', -2) + 1)) + params.setdefault('skip_download', True) + + ydl = YoutubeDL(params, auto_init=False) + ydl.add_default_info_extractors() + finished_hook_called = set() + + def _hook(status): + if status['status'] == 'finished': + finished_hook_called.add(status['filename']) + ydl.add_progress_hook(_hook) + expect_warnings(ydl, test_case.get('expected_warnings', [])) + + def get_tc_filename(tc): + return ydl.prepare_filename(dict(tc.get('info_dict', {}))) + + res_dict = None + + def match_exception(err): + expected_exception = test_case.get('expected_exception') + if not expected_exception: + return False + if err.__class__.__name__ == expected_exception: + return True + for exc in err.exc_info: + if exc.__class__.__name__ == expected_exception: + return True + return False + + def try_rm_tcs_files(tcs=None): + if tcs is None: + tcs = test_cases + for tc in tcs: + tc_filename = get_tc_filename(tc) + try_rm(tc_filename) + try_rm(tc_filename + '.part') + try_rm(os.path.splitext(tc_filename)[0] + '.info.json') + try_rm_tcs_files() + try: + try_num = 1 + while True: + try: + # We're not using .download here since that is just a shim + # for outside error handling, and returns the exit code + # instead of the result dict. + res_dict = ydl.extract_info( + test_case['url'], + force_generic_extractor=params.get('force_generic_extractor', False)) + except (DownloadError, ExtractorError) as err: + # Check if the exception is not a network related one + if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503): + if match_exception(err): + return + err.msg = f'{getattr(err, "msg", err)} ({tname})' + raise + + if try_num == RETRIES: + report_warning('%s failed due to network errors, skipping...' % tname) + return + + print(f'Retrying: {try_num} failed tries\n\n##########\n\n') + + try_num += 1 + except YoutubeDLError as err: + if match_exception(err): + return + raise + else: + break + + if is_playlist: + self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video']) + self.assertTrue('entries' in res_dict) + expect_info_dict(self, res_dict, test_case.get('info_dict', {})) + + if 'playlist_mincount' in test_case: + assertGreaterEqual( + self, + len(res_dict['entries']), + test_case['playlist_mincount'], + 'Expected at least %d in playlist %s, but got only %d' % ( + test_case['playlist_mincount'], test_case['url'], + len(res_dict['entries']))) + if 'playlist_count' in test_case: + self.assertEqual( + len(res_dict['entries']), + test_case['playlist_count'], + 'Expected %d entries in playlist %s, but got %d.' % ( + test_case['playlist_count'], + test_case['url'], + len(res_dict['entries']), + )) + if 'playlist_duration_sum' in test_case: + got_duration = sum(e['duration'] for e in res_dict['entries']) + self.assertEqual( + test_case['playlist_duration_sum'], got_duration) + + # Generalize both playlists and single videos to unified format for + # simplicity + if 'entries' not in res_dict: + res_dict['entries'] = [res_dict] + + for tc_num, tc in enumerate(test_cases): + tc_res_dict = res_dict['entries'][tc_num] + # First, check test cases' data against extracted data alone + expect_info_dict(self, tc_res_dict, tc.get('info_dict', {})) + if tc_res_dict.get('_type', 'video') != 'video': + continue + # Now, check downloaded file consistency + tc_filename = get_tc_filename(tc) + if not test_case.get('params', {}).get('skip_download', False): + self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) + self.assertTrue(tc_filename in finished_hook_called) + expected_minsize = tc.get('file_minsize', 10000) + if expected_minsize is not None: + if params.get('test'): + expected_minsize = max(expected_minsize, 10000) + got_fsize = os.path.getsize(tc_filename) + assertGreaterEqual( + self, got_fsize, expected_minsize, + 'Expected %s to be at least %s, but it\'s only %s ' % + (tc_filename, format_bytes(expected_minsize), + format_bytes(got_fsize))) + if 'md5' in tc: + md5_for_file = _file_md5(tc_filename) + self.assertEqual(tc['md5'], md5_for_file) + # Finally, check test cases' data again but this time against + # extracted data from info JSON file written during processing + info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' + self.assertTrue( + os.path.exists(info_json_fn), + 'Missing info file %s' % info_json_fn) + with open(info_json_fn, encoding='utf-8') as infof: + info_dict = json.load(infof) + expect_info_dict(self, info_dict, tc.get('info_dict', {})) + finally: + try_rm_tcs_files() + if is_playlist and res_dict is not None and res_dict.get('entries'): + # Remove all other files that may have been extracted if the + # extractor returns full results even with extract_flat + res_tcs = [{'info_dict': e} for e in res_dict['entries']] + try_rm_tcs_files(res_tcs) + ydl.close() + return test_template + + +# And add them to TestDownload +def inject_tests(test_cases, label=''): + for test_case in test_cases: + name = test_case['name'] + tname = join_nonempty('test', name, label, tests_counter[name][label], delim='_') + tests_counter[name][label] += 1 + + test_method = generator(test_case, tname) + test_method.__name__ = tname + test_method.add_ie = ','.join(test_case.get('add_ie', [])) + setattr(TestDownload, test_method.__name__, test_method) + + +inject_tests(normal_test_cases) + +# TODO: disable redirection to the IE to ensure we are actually testing the webpage extraction +inject_tests(webpage_test_cases, 'webpage') + + +def batch_generator(name): + def test_template(self): + for label, num_tests in tests_counter[name].items(): + for i in range(num_tests): + test_name = join_nonempty('test', name, label, i, delim='_') + try: + getattr(self, test_name)() + except unittest.SkipTest: + print(f'Skipped {test_name}') + + return test_template + + +for name in tests_counter: + test_method = batch_generator(name) + test_method.__name__ = f'test_{name}_all' + test_method.add_ie = '' + setattr(TestDownload, test_method.__name__, test_method) +del test_method + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py new file mode 100644 index 0000000..62f7d45 --- /dev/null +++ b/test/test_downloader_external.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import http.cookiejar + +from test.helper import FakeYDL +from yt_dlp.downloader.external import ( + Aria2cFD, + AxelFD, + CurlFD, + FFmpegFD, + HttpieFD, + WgetFD, +) + +TEST_COOKIE = { + 'version': 0, + 'name': 'test', + 'value': 'ytdlp', + 'port': None, + 'port_specified': False, + 'domain': '.example.com', + 'domain_specified': True, + 'domain_initial_dot': False, + 'path': '/', + 'path_specified': True, + 'secure': False, + 'expires': None, + 'discard': False, + 'comment': None, + 'comment_url': None, + 'rest': {}, +} + +TEST_INFO = {'url': 'http://www.example.com/'} + + +class TestHttpieFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = HttpieFD(ydl, {}) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['http', '--download', '--output', 'test', 'http://www.example.com/']) + + # Test cookie header is added + ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['http', '--download', '--output', 'test', 'http://www.example.com/', 'Cookie:test=ytdlp']) + + +class TestAxelFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = AxelFD(ydl, {}) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['axel', '-o', 'test', '--', 'http://www.example.com/']) + + # Test cookie header is added + ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) + self.assertEqual( + downloader._make_cmd('test', TEST_INFO), + ['axel', '-o', 'test', '-H', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/']) + + +class TestWgetFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = WgetFD(ydl, {}) + self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) + # Test cookiejar tempfile arg is added + ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) + self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) + + +class TestCurlFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = CurlFD(ydl, {}) + self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO)) + # Test cookie header is added + ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) + self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO)) + self.assertIn('test=ytdlp', downloader._make_cmd('test', TEST_INFO)) + + +class TestAria2cFD(unittest.TestCase): + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = Aria2cFD(ydl, {}) + downloader._make_cmd('test', TEST_INFO) + self.assertFalse(hasattr(downloader, '_cookies_tempfile')) + + # Test cookiejar tempfile arg is added + ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) + cmd = downloader._make_cmd('test', TEST_INFO) + self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd) + + +@unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found') +class TestFFmpegFD(unittest.TestCase): + _args = [] + + def _test_cmd(self, args): + self._args = args + + def test_make_cmd(self): + with FakeYDL() as ydl: + downloader = FFmpegFD(ydl, {}) + downloader._debug_cmd = self._test_cmd + + downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'}) + self.assertEqual(self._args, [ + 'ffmpeg', '-y', '-hide_banner', '-i', 'http://www.example.com/', + '-c', 'copy', '-f', 'mp4', 'file:test']) + + # Test cookies arg is added + ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) + downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'}) + self.assertEqual(self._args, [ + 'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n', + '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test']) + + # Test with non-url input (ffmpeg reads from stdin '-' for websockets) + downloader._call_downloader('test', {'url': 'x', 'ext': 'mp4'}) + self.assertEqual(self._args, [ + 'ffmpeg', '-y', '-hide_banner', '-i', 'x', '-c', 'copy', '-f', 'mp4', 'file:test']) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py new file mode 100644 index 0000000..099ec2f --- /dev/null +++ b/test/test_downloader_http.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import http.server +import re +import threading + +from test.helper import http_server_port, try_rm +from yt_dlp import YoutubeDL +from yt_dlp.downloader.http import HttpFD +from yt_dlp.utils import encodeFilename +from yt_dlp.utils._utils import _YDLLogger as FakeLogger + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + + +TEST_SIZE = 10 * 1024 + + +class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def send_content_range(self, total=None): + range_header = self.headers.get('Range') + start = end = None + if range_header: + mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header) + if mobj: + start = int(mobj.group(1)) + end = int(mobj.group(2)) + valid_range = start is not None and end is not None + if valid_range: + content_range = 'bytes %d-%d' % (start, end) + if total: + content_range += '/%d' % total + self.send_header('Content-Range', content_range) + return (end - start + 1) if valid_range else total + + def serve(self, range=True, content_length=True): + self.send_response(200) + self.send_header('Content-Type', 'video/mp4') + size = TEST_SIZE + if range: + size = self.send_content_range(TEST_SIZE) + if content_length: + self.send_header('Content-Length', size) + self.end_headers() + self.wfile.write(b'#' * size) + + def do_GET(self): + if self.path == '/regular': + self.serve() + elif self.path == '/no-content-length': + self.serve(content_length=False) + elif self.path == '/no-range': + self.serve(range=False) + elif self.path == '/no-range-no-content-length': + self.serve(range=False, content_length=False) + else: + assert False + + +class TestHttpFD(unittest.TestCase): + def setUp(self): + self.httpd = http.server.HTTPServer( + ('127.0.0.1', 0), HTTPTestRequestHandler) + self.port = http_server_port(self.httpd) + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + def download(self, params, ep): + params['logger'] = FakeLogger() + ydl = YoutubeDL(params) + downloader = HttpFD(ydl, params) + filename = 'testfile.mp4' + try_rm(encodeFilename(filename)) + self.assertTrue(downloader.real_download(filename, { + 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep), + }), ep) + self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) + try_rm(encodeFilename(filename)) + + def download_all(self, params): + for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): + self.download(params, ep) + + def test_regular(self): + self.download_all({}) + + def test_chunked(self): + self.download_all({ + 'http_chunk_size': 1000, + }) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_execution.py b/test/test_execution.py new file mode 100644 index 0000000..c6ee9cf --- /dev/null +++ b/test/test_execution.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import contextlib +import subprocess + +from yt_dlp.utils import Popen + +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +LAZY_EXTRACTORS = 'yt_dlp/extractor/lazy_extractors.py' + + +class TestExecution(unittest.TestCase): + def run_yt_dlp(self, exe=(sys.executable, 'yt_dlp/__main__.py'), opts=('--version', )): + stdout, stderr, returncode = Popen.run( + [*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + print(stderr, file=sys.stderr) + self.assertEqual(returncode, 0) + return stdout.strip(), stderr.strip() + + def test_main_exec(self): + self.run_yt_dlp() + + def test_import(self): + self.run_yt_dlp(exe=(sys.executable, '-c', 'import yt_dlp')) + + def test_module_exec(self): + self.run_yt_dlp(exe=(sys.executable, '-m', 'yt_dlp')) + + def test_cmdline_umlauts(self): + _, stderr = self.run_yt_dlp(opts=('ä', '--version')) + self.assertFalse(stderr) + + def test_lazy_extractors(self): + try: + subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS], + cwd=rootDir, stdout=subprocess.DEVNULL) + self.assertTrue(os.path.exists(LAZY_EXTRACTORS)) + + _, stderr = self.run_yt_dlp(opts=('-s', 'test:')) + # `MIN_RECOMMENDED` emits a deprecated feature warning for deprecated Python versions + if stderr and stderr.startswith('Deprecated Feature: Support for Python'): + stderr = '' + self.assertFalse(stderr) + + subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL) + finally: + with contextlib.suppress(OSError): + os.remove(LAZY_EXTRACTORS) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py new file mode 100644 index 0000000..47c632a --- /dev/null +++ b/test/test_iqiyi_sdk_interpreter.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import FakeYDL, is_download_test +from yt_dlp.extractor import IqiyiIE + + +class WarningLogger: + def __init__(self): + self.messages = [] + + def warning(self, msg): + self.messages.append(msg) + + def debug(self, msg): + pass + + def error(self, msg): + pass + + +@is_download_test +class TestIqiyiSDKInterpreter(unittest.TestCase): + def test_iqiyi_sdk_interpreter(self): + ''' + Test the functionality of IqiyiSDKInterpreter by trying to log in + + If `sign` is incorrect, /validate call throws an HTTP 556 error + ''' + logger = WarningLogger() + ie = IqiyiIE(FakeYDL({'logger': logger})) + ie._perform_login('foo', 'bar') + self.assertTrue('unable to log in:' in logger.messages[0]) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py new file mode 100644 index 0000000..86928a6 --- /dev/null +++ b/test/test_jsinterp.py @@ -0,0 +1,380 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import math + +from yt_dlp.jsinterp import JS_Undefined, JSInterpreter + + +class NaN: + pass + + +class TestJSInterpreter(unittest.TestCase): + def _test(self, jsi_or_code, expected, func='f', args=()): + if isinstance(jsi_or_code, str): + jsi_or_code = JSInterpreter(jsi_or_code) + got = jsi_or_code.call_function(func, *args) + if expected is NaN: + self.assertTrue(math.isnan(got), f'{got} is not NaN') + else: + self.assertEqual(got, expected) + + def test_basic(self): + jsi = JSInterpreter('function f(){;}') + self.assertEqual(repr(jsi.extract_function('f')), 'F<f>') + self._test(jsi, None) + + self._test('function f(){return 42;}', 42) + self._test('function f(){42}', None) + self._test('var f = function(){return 42;}', 42) + + def test_add(self): + self._test('function f(){return 42 + 7;}', 49) + self._test('function f(){return 42 + undefined;}', NaN) + self._test('function f(){return 42 + null;}', 42) + + def test_sub(self): + self._test('function f(){return 42 - 7;}', 35) + self._test('function f(){return 42 - undefined;}', NaN) + self._test('function f(){return 42 - null;}', 42) + + def test_mul(self): + self._test('function f(){return 42 * 7;}', 294) + self._test('function f(){return 42 * undefined;}', NaN) + self._test('function f(){return 42 * null;}', 0) + + def test_div(self): + jsi = JSInterpreter('function f(a, b){return a / b;}') + self._test(jsi, NaN, args=(0, 0)) + self._test(jsi, NaN, args=(JS_Undefined, 1)) + self._test(jsi, float('inf'), args=(2, 0)) + self._test(jsi, 0, args=(0, 3)) + + def test_mod(self): + self._test('function f(){return 42 % 7;}', 0) + self._test('function f(){return 42 % 0;}', NaN) + self._test('function f(){return 42 % undefined;}', NaN) + + def test_exp(self): + self._test('function f(){return 42 ** 2;}', 1764) + self._test('function f(){return 42 ** undefined;}', NaN) + self._test('function f(){return 42 ** null;}', 1) + self._test('function f(){return undefined ** 42;}', NaN) + + def test_calc(self): + self._test('function f(a){return 2*a+1;}', 7, args=[3]) + + def test_empty_return(self): + self._test('function f(){return; y()}', None) + + def test_morespace(self): + self._test('function f (a) { return 2 * a + 1 ; }', 7, args=[3]) + self._test('function f () { x = 2 ; return x; }', 2) + + def test_strange_chars(self): + self._test('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }', + 21, args=[20], func='$_xY1') + + def test_operators(self): + self._test('function f(){return 1 << 5;}', 32) + self._test('function f(){return 2 ** 5}', 32) + self._test('function f(){return 19 & 21;}', 17) + self._test('function f(){return 11 >> 2;}', 2) + self._test('function f(){return []? 2+3: 4;}', 5) + self._test('function f(){return 1 == 2}', False) + self._test('function f(){return 0 && 1 || 2;}', 2) + self._test('function f(){return 0 ?? 42;}', 0) + self._test('function f(){return "life, the universe and everything" < 42;}', False) + + def test_array_access(self): + self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7]) + + def test_parens(self): + self._test('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}', 7) + self._test('function f(){return (1 + 2) * 3;}', 9) + + def test_quotes(self): + self._test(R'function f(){return "a\"\\("}', R'a"\(') + + def test_assignments(self): + self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31) + self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51) + self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11) + + @unittest.skip('Not implemented') + def test_comments(self): + self._test(''' + function f() { + var x = /* 1 + */ 2; + var y = /* 30 + * 40 */ 50; + return x + y; + } + ''', 52) + + self._test(''' + function f() { + var x = "/*"; + var y = 1 /* comment */ + 2; + return y; + } + ''', 3) + + def test_precedence(self): + self._test(''' + function f() { + var a = [10, 20, 30, 40, 50]; + var b = 6; + a[0]=a[b%a.length]; + return a; + } + ''', [20, 20, 30, 40, 50]) + + def test_builtins(self): + self._test('function f() { return NaN }', NaN) + + def test_date(self): + self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000) + + jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }') + self._test(jsi, 86000, args=['Wednesday 31 December 1969 18:01:26 MDT']) + self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT']) # m/d/y + self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC']) + + def test_call(self): + jsi = JSInterpreter(''' + function x() { return 2; } + function y(a) { return x() + (a?a:0); } + function z() { return y(3); } + ''') + self._test(jsi, 5, func='z') + self._test(jsi, 2, func='y') + + def test_if(self): + self._test(''' + function f() { + let a = 9; + if (0==0) {a++} + return a + } + ''', 10) + + self._test(''' + function f() { + if (0==0) {return 10} + } + ''', 10) + + self._test(''' + function f() { + if (0!=0) {return 1} + else {return 10} + } + ''', 10) + + """ # Unsupported + self._test(''' + function f() { + if (0!=0) {return 1} + else if (1==0) {return 2} + else {return 10} + } + ''', 10) + """ + + def test_for_loop(self): + self._test('function f() { a=0; for (i=0; i-10; i++) {a++} return a }', 10) + + def test_switch(self): + jsi = JSInterpreter(''' + function f(x) { switch(x){ + case 1:x+=1; + case 2:x+=2; + case 3:x+=3;break; + case 4:x+=4; + default:x=0; + } return x } + ''') + self._test(jsi, 7, args=[1]) + self._test(jsi, 6, args=[3]) + self._test(jsi, 0, args=[5]) + + def test_switch_default(self): + jsi = JSInterpreter(''' + function f(x) { switch(x){ + case 2: x+=2; + default: x-=1; + case 5: + case 6: x+=6; + case 0: break; + case 1: x+=1; + } return x } + ''') + self._test(jsi, 2, args=[1]) + self._test(jsi, 11, args=[5]) + self._test(jsi, 14, args=[9]) + + def test_try(self): + self._test('function f() { try{return 10} catch(e){return 5} }', 10) + + def test_catch(self): + self._test('function f() { try{throw 10} catch(e){return 5} }', 5) + + def test_finally(self): + self._test('function f() { try{throw 10} finally {return 42} }', 42) + self._test('function f() { try{throw 10} catch(e){return 5} finally {return 42} }', 42) + + def test_nested_try(self): + self._test(''' + function f() {try { + try{throw 10} finally {throw 42} + } catch(e){return 5} } + ''', 5) + + def test_for_loop_continue(self): + self._test('function f() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }', 0) + + def test_for_loop_break(self): + self._test('function f() { a=0; for (i=0; i-10; i++) { break; a++ } return a }', 0) + + def test_for_loop_try(self): + self._test(''' + function f() { + for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} }; + return 42 } + ''', 42) + + def test_literal_list(self): + self._test('function f() { return [1, 2, "asdf", [5, 6, 7]][3] }', [5, 6, 7]) + + def test_comma(self): + self._test('function f() { a=5; a -= 1, a+=3; return a }', 7) + self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7) + self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5) + + def test_void(self): + self._test('function f() { return void 42; }', None) + + def test_return_function(self): + jsi = JSInterpreter(''' + function f() { return [1, function(){return 1}][1] } + ''') + self.assertEqual(jsi.call_function('f')([]), 1) + + def test_null(self): + self._test('function f() { return null; }', None) + self._test('function f() { return [null > 0, null < 0, null == 0, null === 0]; }', + [False, False, False, False]) + self._test('function f() { return [null >= 0, null <= 0]; }', [True, True]) + + def test_undefined(self): + self._test('function f() { return undefined === undefined; }', True) + self._test('function f() { return undefined; }', JS_Undefined) + self._test('function f() {return undefined ?? 42; }', 42) + self._test('function f() { let v; return v; }', JS_Undefined) + self._test('function f() { let v; return v**0; }', 1) + self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }', + [False, False, JS_Undefined, JS_Undefined]) + + self._test(''' + function f() { return [ + undefined === undefined, + undefined == undefined, + undefined == null, + undefined < undefined, + undefined > undefined, + undefined === 0, + undefined == 0, + undefined < 0, + undefined > 0, + undefined >= 0, + undefined <= 0, + undefined > null, + undefined < null, + undefined === null + ]; } + ''', list(map(bool, (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)))) + + jsi = JSInterpreter(''' + function f() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; } + ''') + for y in jsi.call_function('f'): + self.assertTrue(math.isnan(y)) + + def test_object(self): + self._test('function f() { return {}; }', {}) + self._test('function f() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }', [42, 0]) + self._test('function f() { let a; return a?.qq; }', JS_Undefined) + self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined) + + def test_regex(self): + self._test('function f() { let a=/,,[/,913,/](,)}/; }', None) + self._test('function f() { let a=/,,[/,913,/](,)}/; return a; }', R'/,,[/,913,/](,)}/0') + + R''' # We are not compiling regex + jsi = JSInterpreter('function f() { let a=/,,[/,913,/](,)}/; return a; }') + self.assertIsInstance(jsi.call_function('f'), re.Pattern) + + jsi = JSInterpreter('function f() { let a=/,,[/,913,/](,)}/i; return a; }') + self.assertEqual(jsi.call_function('f').flags & re.I, re.I) + + jsi = JSInterpreter(R'function f() { let a=/,][}",],()}(\[)/; return a; }') + self.assertEqual(jsi.call_function('f').pattern, r',][}",],()}(\[)') + + jsi = JSInterpreter(R'function f() { let a=[/[)\\]/]; return a[0]; }') + self.assertEqual(jsi.call_function('f').pattern, r'[)\\]') + ''' + + @unittest.skip('Not implemented') + def test_replace(self): + self._test('function f() { let a="data-name".replace("data-", ""); return a }', + 'name') + self._test('function f() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }', + 'name') + self._test('function f() { let a="data-name".replace(/^.+-/, ""); return a; }', + 'name') + self._test('function f() { let a="data-name".replace(/a/g, "o"); return a; }', + 'doto-nome') + self._test('function f() { let a="data-name".replaceAll("a", "o"); return a; }', + 'doto-nome') + + def test_char_code_at(self): + jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}') + self._test(jsi, 116, args=[0]) + self._test(jsi, 101, args=[1]) + self._test(jsi, 115, args=[2]) + self._test(jsi, 116, args=[3]) + self._test(jsi, None, args=[4]) + self._test(jsi, 116, args=['not_a_number']) + + def test_bitwise_operators_overflow(self): + self._test('function f(){return -524999584 << 5}', 379882496) + self._test('function f(){return 1236566549 << 5}', 915423904) + + def test_bitwise_operators_typecast(self): + self._test('function f(){return null << 5}', 0) + self._test('function f(){return undefined >> 5}', 0) + self._test('function f(){return 42 << NaN}', 42) + + def test_negative(self): + self._test('function f(){return 2 * -2.0 ;}', -4) + self._test('function f(){return 2 - - -2 ;}', 0) + self._test('function f(){return 2 - - - -2 ;}', 4) + self._test('function f(){return 2 - + + - -2;}', 0) + self._test('function f(){return 2 + - + - -2;}', 0) + + @unittest.skip('Not implemented') + def test_packed(self): + jsi = JSInterpreter('''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''') + self.assertEqual(jsi.call_function('f', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_netrc.py b/test/test_netrc.py new file mode 100644 index 0000000..dc708d9 --- /dev/null +++ b/test/test_netrc.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from yt_dlp.extractor import gen_extractor_classes +from yt_dlp.extractor.common import InfoExtractor + +NO_LOGIN = InfoExtractor._perform_login + + +class TestNetRc(unittest.TestCase): + def test_netrc_present(self): + for ie in gen_extractor_classes(): + if ie._perform_login is NO_LOGIN: + continue + self.assertTrue( + ie._NETRC_MACHINE, + 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_networking.py b/test/test_networking.py new file mode 100644 index 0000000..628f1f1 --- /dev/null +++ b/test/test_networking.py @@ -0,0 +1,1631 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import gzip +import http.client +import http.cookiejar +import http.server +import io +import logging +import pathlib +import random +import ssl +import tempfile +import threading +import time +import urllib.error +import urllib.request +import warnings +import zlib +from email.message import Message +from http.cookiejar import CookieJar + +from test.helper import FakeYDL, http_server_port, verify_address_availability +from yt_dlp.cookies import YoutubeDLCookieJar +from yt_dlp.dependencies import brotli, requests, urllib3 +from yt_dlp.networking import ( + HEADRequest, + PUTRequest, + Request, + RequestDirector, + RequestHandler, + Response, +) +from yt_dlp.networking._urllib import UrllibRH +from yt_dlp.networking.exceptions import ( + CertificateVerifyError, + HTTPError, + IncompleteRead, + NoSupportingHandlers, + ProxyError, + RequestError, + SSLError, + TransportError, + UnsupportedRequest, +) +from yt_dlp.utils._utils import _YDLLogger as FakeLogger +from yt_dlp.utils.networking import HTTPHeaderDict + +from test.conftest import validate_and_send + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def _build_proxy_handler(name): + class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): + proxy_name = name + + def log_message(self, format, *args): + pass + + def do_GET(self): + self.send_response(200) + self.send_header('Content-Type', 'text/plain; charset=utf-8') + self.end_headers() + self.wfile.write(f'{self.proxy_name}: {self.path}'.encode()) + return HTTPTestRequestHandler + + +class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): + protocol_version = 'HTTP/1.1' + + def log_message(self, format, *args): + pass + + def _headers(self): + payload = str(self.headers).encode() + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _redirect(self): + self.send_response(int(self.path[len('/redirect_'):])) + self.send_header('Location', '/method') + self.send_header('Content-Length', '0') + self.end_headers() + + def _method(self, method, payload=None): + self.send_response(200) + self.send_header('Content-Length', str(len(payload or ''))) + self.send_header('Method', method) + self.end_headers() + if payload: + self.wfile.write(payload) + + def _status(self, status): + payload = f'<html>{status} NOT FOUND</html>'.encode() + self.send_response(int(status)) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + + def _read_data(self): + if 'Content-Length' in self.headers: + return self.rfile.read(int(self.headers['Content-Length'])) + + def do_POST(self): + data = self._read_data() + str(self.headers).encode() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('POST', data) + elif self.path.startswith('/headers'): + self._headers() + else: + self._status(404) + + def do_HEAD(self): + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('HEAD') + else: + self._status(404) + + def do_PUT(self): + data = self._read_data() + str(self.headers).encode() + if self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('PUT', data) + else: + self._status(404) + + def do_GET(self): + if self.path == '/video.html': + payload = b'<html><video src="/vid.mp4" /></html>' + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + elif self.path == '/vid.mp4': + payload = b'\x00\x00\x00\x00\x20\x66\x74[video]' + self.send_response(200) + self.send_header('Content-Type', 'video/mp4') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + elif self.path == '/%E4%B8%AD%E6%96%87.html': + payload = b'<html><video src="/vid.mp4" /></html>' + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + elif self.path == '/%c7%9f': + payload = b'<html><video src="/vid.mp4" /></html>' + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + elif self.path.startswith('/redirect_loop'): + self.send_response(301) + self.send_header('Location', self.path) + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path == '/redirect_dotsegments': + self.send_response(301) + # redirect to /headers but with dot segments before + self.send_header('Location', '/a/b/./../../headers') + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path == '/redirect_dotsegments_absolute': + self.send_response(301) + # redirect to /headers but with dot segments before - absolute url + self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers') + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path.startswith('/redirect_'): + self._redirect() + elif self.path.startswith('/method'): + self._method('GET', str(self.headers).encode()) + elif self.path.startswith('/headers'): + self._headers() + elif self.path.startswith('/308-to-headers'): + self.send_response(308) + self.send_header('Location', '/headers') + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path == '/trailing_garbage': + payload = b'<html><video src="/vid.mp4" /></html>' + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Encoding', 'gzip') + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode='wb') as f: + f.write(payload) + compressed = buf.getvalue() + b'trailing garbage' + self.send_header('Content-Length', str(len(compressed))) + self.end_headers() + self.wfile.write(compressed) + elif self.path == '/302-non-ascii-redirect': + new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html' + self.send_response(301) + self.send_header('Location', new_url) + self.send_header('Content-Length', '0') + self.end_headers() + elif self.path == '/content-encoding': + encodings = self.headers.get('ytdl-encoding', '') + payload = b'<html><video src="/vid.mp4" /></html>' + for encoding in filter(None, (e.strip() for e in encodings.split(','))): + if encoding == 'br' and brotli: + payload = brotli.compress(payload) + elif encoding == 'gzip': + buf = io.BytesIO() + with gzip.GzipFile(fileobj=buf, mode='wb') as f: + f.write(payload) + payload = buf.getvalue() + elif encoding == 'deflate': + payload = zlib.compress(payload) + elif encoding == 'unsupported': + payload = b'raw' + break + else: + self._status(415) + return + self.send_response(200) + self.send_header('Content-Encoding', encodings) + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + elif self.path.startswith('/gen_'): + payload = b'<html></html>' + self.send_response(int(self.path[len('/gen_'):])) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + elif self.path.startswith('/incompleteread'): + payload = b'<html></html>' + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', '234234') + self.end_headers() + self.wfile.write(payload) + self.finish() + elif self.path.startswith('/timeout_'): + time.sleep(int(self.path[len('/timeout_'):])) + self._headers() + elif self.path == '/source_address': + payload = str(self.client_address[0]).encode() + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + self.finish() + else: + self._status(404) + + def send_header(self, keyword, value): + """ + Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers. + This is against what is defined in RFC 3986, however we need to test we support this + since some sites incorrectly do this. + """ + if keyword.lower() == 'connection': + return super().send_header(keyword, value) + + if not hasattr(self, '_headers_buffer'): + self._headers_buffer = [] + + self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode()) + + +class TestRequestHandlerBase: + @classmethod + def setup_class(cls): + cls.http_httpd = http.server.ThreadingHTTPServer( + ('127.0.0.1', 0), HTTPTestRequestHandler) + cls.http_port = http_server_port(cls.http_httpd) + cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever) + # FIXME: we should probably stop the http server thread after each test + # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041 + cls.http_server_thread.daemon = True + cls.http_server_thread.start() + + # HTTPS server + certfn = os.path.join(TEST_DIR, 'testcert.pem') + cls.https_httpd = http.server.ThreadingHTTPServer( + ('127.0.0.1', 0), HTTPTestRequestHandler) + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True) + cls.https_port = http_server_port(cls.https_httpd) + cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever) + cls.https_server_thread.daemon = True + cls.https_server_thread.start() + + +class TestHTTPRequestHandler(TestRequestHandlerBase): + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_verify_cert(self, handler): + with handler() as rh: + with pytest.raises(CertificateVerifyError): + validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) + + with handler(verify=False) as rh: + r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) + assert r.status == 200 + r.close() + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_ssl_error(self, handler): + # HTTPS server with too old TLS version + # XXX: is there a better way to test this than to create a new server? + https_httpd = http.server.ThreadingHTTPServer( + ('127.0.0.1', 0), HTTPTestRequestHandler) + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True) + https_port = http_server_port(https_httpd) + https_server_thread = threading.Thread(target=https_httpd.serve_forever) + https_server_thread.daemon = True + https_server_thread.start() + + with handler(verify=False) as rh: + with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info: + validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) + assert not issubclass(exc_info.type, CertificateVerifyError) + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_percent_encode(self, handler): + with handler() as rh: + # Unicode characters should be encoded with uppercase percent-encoding + res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html')) + assert res.status == 200 + res.close() + # don't normalize existing percent encodings + res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f')) + assert res.status == 200 + res.close() + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('path', [ + '/a/b/./../../headers', + '/redirect_dotsegments', + # https://github.com/yt-dlp/yt-dlp/issues/9020 + '/redirect_dotsegments_absolute', + ]) + def test_remove_dot_segments(self, handler, path): + with handler(verbose=True) as rh: + # This isn't a comprehensive test, + # but it should be enough to check whether the handler is removing dot segments in required scenarios + res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}')) + assert res.status == 200 + assert res.url == f'http://127.0.0.1:{self.http_port}/headers' + res.close() + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_unicode_path_redirection(self, handler): + with handler() as rh: + r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect')) + assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html' + r.close() + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_raise_http_error(self, handler): + with handler() as rh: + for bad_status in (400, 500, 599, 302): + with pytest.raises(HTTPError): + validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status))) + + # Should not raise an error + validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close() + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_response_url(self, handler): + with handler() as rh: + # Response url should be that of the last url in redirect chain + res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301')) + assert res.url == f'http://127.0.0.1:{self.http_port}/method' + res.close() + res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200')) + assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200' + res2.close() + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_redirect(self, handler): + with handler() as rh: + def do_req(redirect_status, method, assert_no_content=False): + data = b'testdata' if method in ('POST', 'PUT') else None + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data)) + + headers = b'' + data_sent = b'' + if data is not None: + data_sent += res.read(len(data)) + if data_sent != data: + headers += data_sent + data_sent = b'' + + headers += res.read() + + if assert_no_content or data is None: + assert b'Content-Type' not in headers + assert b'Content-Length' not in headers + else: + assert b'Content-Type' in headers + assert b'Content-Length' in headers + + return data_sent.decode(), res.headers.get('method', '') + + # A 303 must either use GET or HEAD for subsequent request + assert do_req(303, 'POST', True) == ('', 'GET') + assert do_req(303, 'HEAD') == ('', 'HEAD') + + assert do_req(303, 'PUT', True) == ('', 'GET') + + # 301 and 302 turn POST only into a GET + assert do_req(301, 'POST', True) == ('', 'GET') + assert do_req(301, 'HEAD') == ('', 'HEAD') + assert do_req(302, 'POST', True) == ('', 'GET') + assert do_req(302, 'HEAD') == ('', 'HEAD') + + assert do_req(301, 'PUT') == ('testdata', 'PUT') + assert do_req(302, 'PUT') == ('testdata', 'PUT') + + # 307 and 308 should not change method + for m in ('POST', 'PUT'): + assert do_req(307, m) == ('testdata', m) + assert do_req(308, m) == ('testdata', m) + + assert do_req(307, 'HEAD') == ('', 'HEAD') + assert do_req(308, 'HEAD') == ('', 'HEAD') + + # These should not redirect and instead raise an HTTPError + for code in (300, 304, 305, 306): + with pytest.raises(HTTPError): + do_req(code, 'GET') + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_request_cookie_header(self, handler): + # We should accept a Cookie header being passed as in normal headers and handle it appropriately. + with handler() as rh: + # Specified Cookie header should be used + res = validate_and_send( + rh, Request( + f'http://127.0.0.1:{self.http_port}/headers', + headers={'Cookie': 'test=test'})).read().decode() + assert 'Cookie: test=test' in res + + # Specified Cookie header should be removed on any redirect + res = validate_and_send( + rh, Request( + f'http://127.0.0.1:{self.http_port}/308-to-headers', + headers={'Cookie': 'test=test'})).read().decode() + assert 'Cookie: test=test' not in res + + # Specified Cookie header should override global cookiejar for that request + cookiejar = YoutubeDLCookieJar() + cookiejar.set_cookie(http.cookiejar.Cookie( + version=0, name='test', value='ytdlp', port=None, port_specified=False, + domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/', + path_specified=True, secure=False, expires=None, discard=False, comment=None, + comment_url=None, rest={})) + + with handler(cookiejar=cookiejar) as rh: + data = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read() + assert b'Cookie: test=ytdlp' not in data + assert b'Cookie: test=test' in data + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_redirect_loop(self, handler): + with handler() as rh: + with pytest.raises(HTTPError, match='redirect loop'): + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop')) + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_incompleteread(self, handler): + with handler(timeout=2) as rh: + with pytest.raises(IncompleteRead): + validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read() + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_cookies(self, handler): + cookiejar = YoutubeDLCookieJar() + cookiejar.set_cookie(http.cookiejar.Cookie( + 0, 'test', 'ytdlp', None, False, '127.0.0.1', True, + False, '/headers', True, False, None, False, None, None, {})) + + with handler(cookiejar=cookiejar) as rh: + data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() + assert b'Cookie: test=ytdlp' in data + + # Per request + with handler() as rh: + data = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read() + assert b'Cookie: test=ytdlp' in data + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_headers(self, handler): + + with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: + # Global Headers + data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() + assert b'Test1: test' in data + + # Per request headers, merged with global + data = validate_and_send(rh, Request( + f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read() + assert b'Test1: test' in data + assert b'Test2: changed' in data + assert b'Test2: test2' not in data + assert b'Test3: test3' in data + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_timeout(self, handler): + with handler() as rh: + # Default timeout is 20 seconds, so this should go through + validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3')) + + with handler(timeout=0.5) as rh: + with pytest.raises(TransportError): + validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1')) + + # Per request timeout, should override handler timeout + validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4})) + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_source_address(self, handler): + source_address = f'127.0.0.{random.randint(5, 255)}' + # on some systems these loopback addresses we need for testing may not be available + # see: https://github.com/yt-dlp/yt-dlp/issues/8890 + verify_address_availability(source_address) + with handler(source_address=source_address) as rh: + data = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode() + assert source_address == data + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_gzip_trailing_garbage(self, handler): + with handler() as rh: + data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode() + assert data == '<html><video src="/vid.mp4" /></html>' + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.skipif(not brotli, reason='brotli support is not installed') + def test_brotli(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request( + f'http://127.0.0.1:{self.http_port}/content-encoding', + headers={'ytdl-encoding': 'br'})) + assert res.headers.get('Content-Encoding') == 'br' + assert res.read() == b'<html><video src="/vid.mp4" /></html>' + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_deflate(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request( + f'http://127.0.0.1:{self.http_port}/content-encoding', + headers={'ytdl-encoding': 'deflate'})) + assert res.headers.get('Content-Encoding') == 'deflate' + assert res.read() == b'<html><video src="/vid.mp4" /></html>' + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_gzip(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request( + f'http://127.0.0.1:{self.http_port}/content-encoding', + headers={'ytdl-encoding': 'gzip'})) + assert res.headers.get('Content-Encoding') == 'gzip' + assert res.read() == b'<html><video src="/vid.mp4" /></html>' + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_multiple_encodings(self, handler): + with handler() as rh: + for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'): + res = validate_and_send( + rh, Request( + f'http://127.0.0.1:{self.http_port}/content-encoding', + headers={'ytdl-encoding': pair})) + assert res.headers.get('Content-Encoding') == pair + assert res.read() == b'<html><video src="/vid.mp4" /></html>' + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_unsupported_encoding(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request( + f'http://127.0.0.1:{self.http_port}/content-encoding', + headers={'ytdl-encoding': 'unsupported'})) + assert res.headers.get('Content-Encoding') == 'unsupported' + assert res.read() == b'raw' + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_read(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers')) + assert res.readable() + assert res.read(1) == b'H' + assert res.read(3) == b'ost' + + +class TestHTTPProxy(TestRequestHandlerBase): + @classmethod + def setup_class(cls): + super().setup_class() + # HTTP Proxy server + cls.proxy = http.server.ThreadingHTTPServer( + ('127.0.0.1', 0), _build_proxy_handler('normal')) + cls.proxy_port = http_server_port(cls.proxy) + cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever) + cls.proxy_thread.daemon = True + cls.proxy_thread.start() + + # Geo proxy server + cls.geo_proxy = http.server.ThreadingHTTPServer( + ('127.0.0.1', 0), _build_proxy_handler('geo')) + cls.geo_port = http_server_port(cls.geo_proxy) + cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever) + cls.geo_proxy_thread.daemon = True + cls.geo_proxy_thread.start() + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_http_proxy(self, handler): + http_proxy = f'http://127.0.0.1:{self.proxy_port}' + geo_proxy = f'http://127.0.0.1:{self.geo_port}' + + # Test global http proxy + # Test per request http proxy + # Test per request http proxy disables proxy + url = 'http://foo.com/bar' + + # Global HTTP proxy + with handler(proxies={'http': http_proxy}) as rh: + res = validate_and_send(rh, Request(url)).read().decode() + assert res == f'normal: {url}' + + # Per request proxy overrides global + res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode() + assert res == f'geo: {url}' + + # and setting to None disables all proxies for that request + real_url = f'http://127.0.0.1:{self.http_port}/headers' + res = validate_and_send( + rh, Request(real_url, proxies={'http': None})).read().decode() + assert res != f'normal: {real_url}' + assert 'Accept' in res + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_noproxy(self, handler): + with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh: + # NO_PROXY + for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'): + nop_response = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode( + 'utf-8') + assert 'Accept' in nop_response + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_allproxy(self, handler): + url = 'http://foo.com/bar' + with handler() as rh: + response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode( + 'utf-8') + assert response == f'normal: {url}' + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_http_proxy_with_idn(self, handler): + with handler(proxies={ + 'http': f'http://127.0.0.1:{self.proxy_port}', + }) as rh: + url = 'http://中文.tw/' + response = rh.send(Request(url)).read().decode() + # b'xn--fiq228c' is '中文'.encode('idna') + assert response == 'normal: http://xn--fiq228c.tw/' + + +class TestClientCertificate: + + @classmethod + def setup_class(cls): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate') + cacertfn = os.path.join(cls.certdir, 'ca.crt') + cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler) + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.verify_mode = ssl.CERT_REQUIRED + sslctx.load_verify_locations(cafile=cacertfn) + sslctx.load_cert_chain(certfn, None) + cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True) + cls.port = http_server_port(cls.httpd) + cls.server_thread = threading.Thread(target=cls.httpd.serve_forever) + cls.server_thread.daemon = True + cls.server_thread.start() + + def _run_test(self, handler, **handler_kwargs): + with handler( + # Disable client-side validation of unacceptable self-signed testcert.pem + # The test is of a check on the server side, so unaffected + verify=False, + **handler_kwargs, + ) as rh: + validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode() + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_certificate_combined_nopass(self, handler): + self._run_test(handler, client_cert={ + 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'), + }) + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_certificate_nocombined_nopass(self, handler): + self._run_test(handler, client_cert={ + 'client_certificate': os.path.join(self.certdir, 'client.crt'), + 'client_certificate_key': os.path.join(self.certdir, 'client.key'), + }) + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_certificate_combined_pass(self, handler): + self._run_test(handler, client_cert={ + 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'), + 'client_certificate_password': 'foobar', + }) + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_certificate_nocombined_pass(self, handler): + self._run_test(handler, client_cert={ + 'client_certificate': os.path.join(self.certdir, 'client.crt'), + 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'), + 'client_certificate_password': 'foobar', + }) + + +class TestRequestHandlerMisc: + """Misc generic tests for request handlers, not related to request or validation testing""" + @pytest.mark.parametrize('handler,logger_name', [ + ('Requests', 'urllib3'), + ('Websockets', 'websockets.client'), + ('Websockets', 'websockets.server') + ], indirect=['handler']) + def test_remove_logging_handler(self, handler, logger_name): + # Ensure any logging handlers, which may contain a YoutubeDL instance, + # are removed when we close the request handler + # See: https://github.com/yt-dlp/yt-dlp/issues/8922 + logging_handlers = logging.getLogger(logger_name).handlers + before_count = len(logging_handlers) + rh = handler() + assert len(logging_handlers) == before_count + 1 + rh.close() + assert len(logging_handlers) == before_count + + +class TestUrllibRequestHandler(TestRequestHandlerBase): + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_file_urls(self, handler): + # See https://github.com/ytdl-org/youtube-dl/issues/8227 + tf = tempfile.NamedTemporaryFile(delete=False) + tf.write(b'foobar') + tf.close() + req = Request(pathlib.Path(tf.name).as_uri()) + with handler() as rh: + with pytest.raises(UnsupportedRequest): + rh.validate(req) + + # Test that urllib never loaded FileHandler + with pytest.raises(TransportError): + rh.send(req) + + with handler(enable_file_urls=True) as rh: + res = validate_and_send(rh, req) + assert res.read() == b'foobar' + res.close() + + os.unlink(tf.name) + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_http_error_returns_content(self, handler): + # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost + def get_response(): + with handler() as rh: + # headers url + try: + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404')) + except HTTPError as e: + return e.response + + assert get_response().read() == b'<html></html>' + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_verify_cert_error_text(self, handler): + # Check the output of the error message + with handler() as rh: + with pytest.raises( + CertificateVerifyError, + match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate' + ): + validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + @pytest.mark.parametrize('req,match,version_check', [ + # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256 + # bpo-39603: Check implemented in 3.7.9+, 3.8.5+ + ( + Request('http://127.0.0.1', method='GET\n'), + 'method can\'t contain control characters', + lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5) + ), + # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265 + # bpo-38576: Check implemented in 3.7.8+, 3.8.3+ + ( + Request('http://127.0.0. 1', method='GET'), + 'URL can\'t contain control characters', + lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3) + ), + # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50 + (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None), + ]) + def test_httplib_validation_errors(self, handler, req, match, version_check): + if version_check and version_check(sys.version_info): + pytest.skip(f'Python {sys.version} version does not have the required validation for this test.') + + with handler() as rh: + with pytest.raises(RequestError, match=match) as exc_info: + validate_and_send(rh, req) + assert not isinstance(exc_info.value, TransportError) + + +@pytest.mark.parametrize('handler', ['Requests'], indirect=True) +class TestRequestsRequestHandler(TestRequestHandlerBase): + @pytest.mark.parametrize('raised,expected', [ + (lambda: requests.exceptions.ConnectTimeout(), TransportError), + (lambda: requests.exceptions.ReadTimeout(), TransportError), + (lambda: requests.exceptions.Timeout(), TransportError), + (lambda: requests.exceptions.ConnectionError(), TransportError), + (lambda: requests.exceptions.ProxyError(), ProxyError), + (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError), + (lambda: requests.exceptions.SSLError(), SSLError), + (lambda: requests.exceptions.InvalidURL(), RequestError), + (lambda: requests.exceptions.InvalidHeader(), RequestError), + # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535 + (lambda: urllib3.exceptions.HTTPError(), TransportError), + (lambda: requests.exceptions.RequestException(), RequestError) + # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object + ]) + def test_request_error_mapping(self, handler, monkeypatch, raised, expected): + with handler() as rh: + def mock_get_instance(*args, **kwargs): + class MockSession: + def request(self, *args, **kwargs): + raise raised() + return MockSession() + + monkeypatch.setattr(rh, '_get_instance', mock_get_instance) + + with pytest.raises(expected) as exc_info: + rh.send(Request('http://fake')) + + assert exc_info.type is expected + + @pytest.mark.parametrize('raised,expected,match', [ + (lambda: urllib3.exceptions.SSLError(), SSLError, None), + (lambda: urllib3.exceptions.TimeoutError(), TransportError, None), + (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None), + (lambda: urllib3.exceptions.ProtocolError(), TransportError, None), + (lambda: urllib3.exceptions.DecodeError(), TransportError, None), + (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all + ( + lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)), + IncompleteRead, + '3 bytes read, 4 more expected' + ), + ( + lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)), + IncompleteRead, + '3 bytes read, 5 more expected' + ), + ]) + def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match): + from requests.models import Response as RequestsResponse + from urllib3.response import HTTPResponse as Urllib3Response + + from yt_dlp.networking._requests import RequestsResponseAdapter + requests_res = RequestsResponse() + requests_res.raw = Urllib3Response(body=b'', status=200) + res = RequestsResponseAdapter(requests_res) + + def mock_read(*args, **kwargs): + raise raised() + monkeypatch.setattr(res.fp, 'read', mock_read) + + with pytest.raises(expected, match=match) as exc_info: + res.read() + + assert exc_info.type is expected + + def test_close(self, handler, monkeypatch): + rh = handler() + session = rh._get_instance(cookiejar=rh.cookiejar) + called = False + original_close = session.close + + def mock_close(*args, **kwargs): + nonlocal called + called = True + return original_close(*args, **kwargs) + + monkeypatch.setattr(session, 'close', mock_close) + rh.close() + assert called + + +def run_validation(handler, error, req, **handler_kwargs): + with handler(**handler_kwargs) as rh: + if error: + with pytest.raises(error): + rh.validate(req) + else: + rh.validate(req) + + +class TestRequestHandlerValidation: + + class ValidationRH(RequestHandler): + def _send(self, request): + raise RequestError('test') + + class NoCheckRH(ValidationRH): + _SUPPORTED_FEATURES = None + _SUPPORTED_PROXY_SCHEMES = None + _SUPPORTED_URL_SCHEMES = None + + def _check_extensions(self, extensions): + extensions.clear() + + class HTTPSupportedRH(ValidationRH): + _SUPPORTED_URL_SCHEMES = ('http',) + + URL_SCHEME_TESTS = [ + # scheme, expected to fail, handler kwargs + ('Urllib', [ + ('http', False, {}), + ('https', False, {}), + ('data', False, {}), + ('ftp', False, {}), + ('file', UnsupportedRequest, {}), + ('file', False, {'enable_file_urls': True}), + ]), + ('Requests', [ + ('http', False, {}), + ('https', False, {}), + ]), + ('Websockets', [ + ('ws', False, {}), + ('wss', False, {}), + ]), + (NoCheckRH, [('http', False, {})]), + (ValidationRH, [('http', UnsupportedRequest, {})]) + ] + + PROXY_SCHEME_TESTS = [ + # scheme, expected to fail + ('Urllib', 'http', [ + ('http', False), + ('https', UnsupportedRequest), + ('socks4', False), + ('socks4a', False), + ('socks5', False), + ('socks5h', False), + ('socks', UnsupportedRequest), + ]), + ('Requests', 'http', [ + ('http', False), + ('https', False), + ('socks4', False), + ('socks4a', False), + ('socks5', False), + ('socks5h', False), + ]), + (NoCheckRH, 'http', [('http', False)]), + (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]), + ('Websockets', 'ws', [('http', UnsupportedRequest)]), + (NoCheckRH, 'http', [('http', False)]), + (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]), + ] + + PROXY_KEY_TESTS = [ + # key, expected to fail + ('Urllib', [ + ('all', False), + ('unrelated', False), + ]), + ('Requests', [ + ('all', False), + ('unrelated', False), + ]), + (NoCheckRH, [('all', False)]), + (HTTPSupportedRH, [('all', UnsupportedRequest)]), + (HTTPSupportedRH, [('no', UnsupportedRequest)]), + ] + + EXTENSION_TESTS = [ + ('Urllib', 'http', [ + ({'cookiejar': 'notacookiejar'}, AssertionError), + ({'cookiejar': YoutubeDLCookieJar()}, False), + ({'cookiejar': CookieJar()}, AssertionError), + ({'timeout': 1}, False), + ({'timeout': 'notatimeout'}, AssertionError), + ({'unsupported': 'value'}, UnsupportedRequest), + ]), + ('Requests', 'http', [ + ({'cookiejar': 'notacookiejar'}, AssertionError), + ({'cookiejar': YoutubeDLCookieJar()}, False), + ({'timeout': 1}, False), + ({'timeout': 'notatimeout'}, AssertionError), + ({'unsupported': 'value'}, UnsupportedRequest), + ]), + (NoCheckRH, 'http', [ + ({'cookiejar': 'notacookiejar'}, False), + ({'somerandom': 'test'}, False), # but any extension is allowed through + ]), + ('Websockets', 'ws', [ + ({'cookiejar': YoutubeDLCookieJar()}, False), + ({'timeout': 2}, False), + ]), + ] + + @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [ + (handler_tests[0], scheme, fail, handler_kwargs) + for handler_tests in URL_SCHEME_TESTS + for scheme, fail, handler_kwargs in handler_tests[1] + + ], indirect=['handler']) + def test_url_scheme(self, handler, scheme, fail, handler_kwargs): + run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {})) + + @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler']) + def test_no_proxy(self, handler, fail): + run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'})) + run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'}) + + @pytest.mark.parametrize('handler,proxy_key,fail', [ + (handler_tests[0], proxy_key, fail) + for handler_tests in PROXY_KEY_TESTS + for proxy_key, fail in handler_tests[1] + ], indirect=['handler']) + def test_proxy_key(self, handler, proxy_key, fail): + run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'})) + run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'}) + + @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [ + (handler_tests[0], handler_tests[1], scheme, fail) + for handler_tests in PROXY_SCHEME_TESTS + for scheme, fail in handler_tests[2] + ], indirect=['handler']) + def test_proxy_scheme(self, handler, req_scheme, scheme, fail): + run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'})) + run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'}) + + @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True) + def test_empty_proxy(self, handler): + run_validation(handler, False, Request('http://', proxies={'http': None})) + run_validation(handler, False, Request('http://'), proxies={'http': None}) + + @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c']) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + def test_invalid_proxy_url(self, handler, proxy_url): + run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url})) + + @pytest.mark.parametrize('handler,scheme,extensions,fail', [ + (handler_tests[0], handler_tests[1], extensions, fail) + for handler_tests in EXTENSION_TESTS + for extensions, fail in handler_tests[2] + ], indirect=['handler']) + def test_extension(self, handler, scheme, extensions, fail): + run_validation( + handler, fail, Request(f'{scheme}://', extensions=extensions)) + + def test_invalid_request_type(self): + rh = self.ValidationRH(logger=FakeLogger()) + for method in (rh.validate, rh.send): + with pytest.raises(TypeError, match='Expected an instance of Request'): + method('not a request') + + +class FakeResponse(Response): + def __init__(self, request): + # XXX: we could make request part of standard response interface + self.request = request + super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url) + + +class FakeRH(RequestHandler): + + def _validate(self, request): + return + + def _send(self, request: Request): + if request.url.startswith('ssl://'): + raise SSLError(request.url[len('ssl://'):]) + return FakeResponse(request) + + +class FakeRHYDL(FakeYDL): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._request_director = self.build_request_director([FakeRH]) + + +class AllUnsupportedRHYDL(FakeYDL): + + def __init__(self, *args, **kwargs): + + class UnsupportedRH(RequestHandler): + def _send(self, request: Request): + pass + + _SUPPORTED_FEATURES = () + _SUPPORTED_PROXY_SCHEMES = () + _SUPPORTED_URL_SCHEMES = () + + super().__init__(*args, **kwargs) + self._request_director = self.build_request_director([UnsupportedRH]) + + +class TestRequestDirector: + + def test_handler_operations(self): + director = RequestDirector(logger=FakeLogger()) + handler = FakeRH(logger=FakeLogger()) + director.add_handler(handler) + assert director.handlers.get(FakeRH.RH_KEY) is handler + + # Handler should overwrite + handler2 = FakeRH(logger=FakeLogger()) + director.add_handler(handler2) + assert director.handlers.get(FakeRH.RH_KEY) is not handler + assert director.handlers.get(FakeRH.RH_KEY) is handler2 + assert len(director.handlers) == 1 + + class AnotherFakeRH(FakeRH): + pass + director.add_handler(AnotherFakeRH(logger=FakeLogger())) + assert len(director.handlers) == 2 + assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY + + director.handlers.pop(FakeRH.RH_KEY, None) + assert director.handlers.get(FakeRH.RH_KEY) is None + assert len(director.handlers) == 1 + + # RequestErrors should passthrough + with pytest.raises(SSLError): + director.send(Request('ssl://something')) + + def test_send(self): + director = RequestDirector(logger=FakeLogger()) + with pytest.raises(RequestError): + director.send(Request('any://')) + director.add_handler(FakeRH(logger=FakeLogger())) + assert isinstance(director.send(Request('http://')), FakeResponse) + + def test_unsupported_handlers(self): + class SupportedRH(RequestHandler): + _SUPPORTED_URL_SCHEMES = ['http'] + + def _send(self, request: Request): + return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url) + + director = RequestDirector(logger=FakeLogger()) + director.add_handler(SupportedRH(logger=FakeLogger())) + director.add_handler(FakeRH(logger=FakeLogger())) + + # First should take preference + assert director.send(Request('http://')).read() == b'supported' + assert director.send(Request('any://')).read() == b'' + + director.handlers.pop(FakeRH.RH_KEY) + with pytest.raises(NoSupportingHandlers): + director.send(Request('any://')) + + def test_unexpected_error(self): + director = RequestDirector(logger=FakeLogger()) + + class UnexpectedRH(FakeRH): + def _send(self, request: Request): + raise TypeError('something') + + director.add_handler(UnexpectedRH(logger=FakeLogger)) + with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'): + director.send(Request('any://')) + + director.handlers.clear() + assert len(director.handlers) == 0 + + # Should not be fatal + director.add_handler(FakeRH(logger=FakeLogger())) + director.add_handler(UnexpectedRH(logger=FakeLogger)) + assert director.send(Request('any://')) + + def test_preference(self): + director = RequestDirector(logger=FakeLogger()) + director.add_handler(FakeRH(logger=FakeLogger())) + + class SomeRH(RequestHandler): + _SUPPORTED_URL_SCHEMES = ['http'] + + def _send(self, request: Request): + return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url) + + def some_preference(rh, request): + return (0 if not isinstance(rh, SomeRH) + else 100 if 'prefer' in request.headers + else -1) + + director.add_handler(SomeRH(logger=FakeLogger())) + director.preferences.add(some_preference) + + assert director.send(Request('http://')).read() == b'' + assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported' + + def test_close(self, monkeypatch): + director = RequestDirector(logger=FakeLogger()) + director.add_handler(FakeRH(logger=FakeLogger())) + called = False + + def mock_close(*args, **kwargs): + nonlocal called + called = True + + monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close) + director.close() + assert called + + +# XXX: do we want to move this to test_YoutubeDL.py? +class TestYoutubeDLNetworking: + + @staticmethod + def build_handler(ydl, handler: RequestHandler = FakeRH): + return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY) + + def test_compat_opener(self): + with FakeYDL() as ydl: + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=DeprecationWarning) + assert isinstance(ydl._opener, urllib.request.OpenerDirector) + + @pytest.mark.parametrize('proxy,expected', [ + ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}), + ('', {'all': '__noproxy__'}), + (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https + ]) + def test_proxy(self, proxy, expected): + old_http_proxy = os.environ.get('HTTP_PROXY') + try: + os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env + with FakeYDL({'proxy': proxy}) as ydl: + assert ydl.proxies == expected + finally: + if old_http_proxy: + os.environ['HTTP_PROXY'] = old_http_proxy + + def test_compat_request(self): + with FakeRHYDL() as ydl: + assert ydl.urlopen('test://') + urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'}) + urllib_req.add_unredirected_header('Cookie', 'bob=bob') + urllib_req.timeout = 2 + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=DeprecationWarning) + req = ydl.urlopen(urllib_req).request + assert req.url == urllib_req.get_full_url() + assert req.data == urllib_req.data + assert req.method == urllib_req.get_method() + assert 'X-Test' in req.headers + assert 'Cookie' in req.headers + assert req.extensions.get('timeout') == 2 + + with pytest.raises(AssertionError): + ydl.urlopen(None) + + def test_extract_basic_auth(self): + with FakeRHYDL() as ydl: + res = ydl.urlopen(Request('http://user:pass@foo.bar')) + assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz' + + def test_sanitize_url(self): + with FakeRHYDL() as ydl: + res = ydl.urlopen(Request('httpss://foo.bar')) + assert res.request.url == 'https://foo.bar' + + def test_file_urls_error(self): + # use urllib handler + with FakeYDL() as ydl: + with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'): + ydl.urlopen('file://') + + @pytest.mark.parametrize('scheme', (['ws', 'wss'])) + def test_websocket_unavailable_error(self, scheme): + with AllUnsupportedRHYDL() as ydl: + with pytest.raises(RequestError, match=r'This request requires WebSocket support'): + ydl.urlopen(f'{scheme}://') + + def test_legacy_server_connect_error(self): + with FakeRHYDL() as ydl: + for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'): + with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'): + ydl.urlopen(f'ssl://{error}') + + with pytest.raises(SSLError, match='testerror'): + ydl.urlopen('ssl://testerror') + + @pytest.mark.parametrize('proxy_key,proxy_url,expected', [ + ('http', '__noproxy__', None), + ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'), + ('https', 'example.com', 'http://example.com'), + ('https', '//example.com', 'http://example.com'), + ('https', 'socks5://example.com', 'socks5h://example.com'), + ('http', 'socks://example.com', 'socks4://example.com'), + ('http', 'socks4://example.com', 'socks4://example.com'), + ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies + ]) + def test_clean_proxy(self, proxy_key, proxy_url, expected): + # proxies should be cleaned in urlopen() + with FakeRHYDL() as ydl: + req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request + assert req.proxies[proxy_key] == expected + + # and should also be cleaned when building the handler + env_key = f'{proxy_key.upper()}_PROXY' + old_env_proxy = os.environ.get(env_key) + try: + os.environ[env_key] = proxy_url # ensure that provided proxies override env + with FakeYDL() as ydl: + rh = self.build_handler(ydl) + assert rh.proxies[proxy_key] == expected + finally: + if old_env_proxy: + os.environ[env_key] = old_env_proxy + + def test_clean_proxy_header(self): + with FakeRHYDL() as ydl: + req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request + assert 'ytdl-request-proxy' not in req.headers + assert req.proxies == {'all': 'http://foo.bar'} + + with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl: + rh = self.build_handler(ydl) + assert 'ytdl-request-proxy' not in rh.headers + assert rh.proxies == {'all': 'http://foo.bar'} + + def test_clean_header(self): + with FakeRHYDL() as ydl: + res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True})) + assert 'Youtubedl-no-compression' not in res.request.headers + assert res.request.headers.get('Accept-Encoding') == 'identity' + + with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl: + rh = self.build_handler(ydl) + assert 'Youtubedl-no-compression' not in rh.headers + assert rh.headers.get('Accept-Encoding') == 'identity' + + with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl: + rh = self.build_handler(ydl) + assert 'Ytdl-socks-proxy' not in rh.headers + + def test_build_handler_params(self): + with FakeYDL({ + 'http_headers': {'test': 'testtest'}, + 'socket_timeout': 2, + 'proxy': 'http://127.0.0.1:8080', + 'source_address': '127.0.0.45', + 'debug_printtraffic': True, + 'compat_opts': ['no-certifi'], + 'nocheckcertificate': True, + 'legacyserverconnect': True, + }) as ydl: + rh = self.build_handler(ydl) + assert rh.headers.get('test') == 'testtest' + assert 'Accept' in rh.headers # ensure std_headers are still there + assert rh.timeout == 2 + assert rh.proxies.get('all') == 'http://127.0.0.1:8080' + assert rh.source_address == '127.0.0.45' + assert rh.verbose is True + assert rh.prefer_system_certs is True + assert rh.verify is False + assert rh.legacy_ssl_support is True + + @pytest.mark.parametrize('ydl_params', [ + {'client_certificate': 'fakecert.crt'}, + {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'}, + {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'}, + {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'}, + ]) + def test_client_certificate(self, ydl_params): + with FakeYDL(ydl_params) as ydl: + rh = self.build_handler(ydl) + assert rh._client_cert == ydl_params # XXX: Too bound to implementation + + def test_urllib_file_urls(self): + with FakeYDL({'enable_file_urls': False}) as ydl: + rh = self.build_handler(ydl, UrllibRH) + assert rh.enable_file_urls is False + + with FakeYDL({'enable_file_urls': True}) as ydl: + rh = self.build_handler(ydl, UrllibRH) + assert rh.enable_file_urls is True + + def test_compat_opt_prefer_urllib(self): + # This assumes urllib only has a preference when this compat opt is given + with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl: + director = ydl.build_request_director([UrllibRH]) + assert len(director.preferences) == 1 + assert director.preferences.pop()(UrllibRH, None) + + +class TestRequest: + + def test_query(self): + req = Request('http://example.com?q=something', query={'v': 'xyz'}) + assert req.url == 'http://example.com?q=something&v=xyz' + + req.update(query={'v': '123'}) + assert req.url == 'http://example.com?q=something&v=123' + req.update(url='http://example.com', query={'v': 'xyz'}) + assert req.url == 'http://example.com?v=xyz' + + def test_method(self): + req = Request('http://example.com') + assert req.method == 'GET' + req.data = b'test' + assert req.method == 'POST' + req.data = None + assert req.method == 'GET' + req.data = b'test2' + req.method = 'PUT' + assert req.method == 'PUT' + req.data = None + assert req.method == 'PUT' + with pytest.raises(TypeError): + req.method = 1 + + def test_request_helpers(self): + assert HEADRequest('http://example.com').method == 'HEAD' + assert PUTRequest('http://example.com').method == 'PUT' + + def test_headers(self): + req = Request('http://example.com', headers={'tesT': 'test'}) + assert req.headers == HTTPHeaderDict({'test': 'test'}) + req.update(headers={'teSt2': 'test2'}) + assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'}) + + req.headers = new_headers = HTTPHeaderDict({'test': 'test'}) + assert req.headers == HTTPHeaderDict({'test': 'test'}) + assert req.headers is new_headers + + # test converts dict to case insensitive dict + req.headers = new_headers = {'test2': 'test2'} + assert isinstance(req.headers, HTTPHeaderDict) + assert req.headers is not new_headers + + with pytest.raises(TypeError): + req.headers = None + + def test_data_type(self): + req = Request('http://example.com') + assert req.data is None + # test bytes is allowed + req.data = b'test' + assert req.data == b'test' + # test iterable of bytes is allowed + i = [b'test', b'test2'] + req.data = i + assert req.data == i + + # test file-like object is allowed + f = io.BytesIO(b'test') + req.data = f + assert req.data == f + + # common mistake: test str not allowed + with pytest.raises(TypeError): + req.data = 'test' + assert req.data != 'test' + + # common mistake: test dict is not allowed + with pytest.raises(TypeError): + req.data = {'test': 'test'} + assert req.data != {'test': 'test'} + + def test_content_length_header(self): + req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'') + assert req.headers.get('Content-Length') == '0' + + req.data = b'test' + assert 'Content-Length' not in req.headers + + req = Request('http://example.com', headers={'Content-Length': '10'}) + assert 'Content-Length' not in req.headers + + def test_content_type_header(self): + req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test') + assert req.headers.get('Content-Type') == 'test' + req.data = b'test2' + assert req.headers.get('Content-Type') == 'test' + req.data = None + assert 'Content-Type' not in req.headers + req.data = b'test3' + assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded' + + def test_update_req(self): + req = Request('http://example.com') + assert req.data is None + assert req.method == 'GET' + assert 'Content-Type' not in req.headers + # Test that zero-byte payloads will be sent + req.update(data=b'') + assert req.data == b'' + assert req.method == 'POST' + assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded' + + def test_proxies(self): + req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'}) + assert req.proxies == {'http': 'http://127.0.0.1:8080'} + + def test_extensions(self): + req = Request(url='http://example.com', extensions={'timeout': 2}) + assert req.extensions == {'timeout': 2} + + def test_copy(self): + req = Request( + url='http://example.com', + extensions={'cookiejar': CookieJar()}, + headers={'Accept-Encoding': 'br'}, + proxies={'http': 'http://127.0.0.1'}, + data=[b'123'] + ) + req_copy = req.copy() + assert req_copy is not req + assert req_copy.url == req.url + assert req_copy.headers == req.headers + assert req_copy.headers is not req.headers + assert req_copy.proxies == req.proxies + assert req_copy.proxies is not req.proxies + + # Data is not able to be copied + assert req_copy.data == req.data + assert req_copy.data is req.data + + # Shallow copy extensions + assert req_copy.extensions is not req.extensions + assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar'] + + # Subclasses are copied by default + class AnotherRequest(Request): + pass + + req = AnotherRequest(url='http://127.0.0.1') + assert isinstance(req.copy(), AnotherRequest) + + def test_url(self): + req = Request(url='https://фtest.example.com/ some spaceв?ä=c',) + assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c' + + assert Request(url='//example.com').url == 'http://example.com' + + with pytest.raises(TypeError): + Request(url='https://').url = None + + +class TestResponse: + + @pytest.mark.parametrize('reason,status,expected', [ + ('custom', 200, 'custom'), + (None, 404, 'Not Found'), # fallback status + ('', 403, 'Forbidden'), + (None, 999, None) + ]) + def test_reason(self, reason, status, expected): + res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason) + assert res.reason == expected + + def test_headers(self): + headers = Message() + headers.add_header('Test', 'test') + headers.add_header('Test', 'test2') + headers.add_header('content-encoding', 'br') + res = Response(io.BytesIO(b''), headers=headers, url='test://') + assert res.headers.get_all('test') == ['test', 'test2'] + assert 'Content-Encoding' in res.headers + + def test_get_header(self): + headers = Message() + headers.add_header('Set-Cookie', 'cookie1') + headers.add_header('Set-cookie', 'cookie2') + headers.add_header('Test', 'test') + headers.add_header('Test', 'test2') + res = Response(io.BytesIO(b''), headers=headers, url='test://') + assert res.get_header('test') == 'test, test2' + assert res.get_header('set-Cookie') == 'cookie1' + assert res.get_header('notexist', 'default') == 'default' + + def test_compat(self): + res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'}) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=DeprecationWarning) + assert res.code == res.getcode() == res.status + assert res.geturl() == res.url + assert res.info() is res.headers + assert res.getheader('test') == res.get_header('test') diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py new file mode 100644 index 0000000..b7b7143 --- /dev/null +++ b/test/test_networking_utils.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import io +import random +import ssl + +from yt_dlp.cookies import YoutubeDLCookieJar +from yt_dlp.dependencies import certifi +from yt_dlp.networking import Response +from yt_dlp.networking._helper import ( + InstanceStoreMixin, + add_accept_encoding_header, + get_redirect_method, + make_socks_proxy_opts, + select_proxy, + ssl_load_certs, +) +from yt_dlp.networking.exceptions import ( + HTTPError, + IncompleteRead, +) +from yt_dlp.socks import ProxyType +from yt_dlp.utils.networking import HTTPHeaderDict + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + + +class TestNetworkingUtils: + + def test_select_proxy(self): + proxies = { + 'all': 'socks5://example.com', + 'http': 'http://example.com:1080', + 'no': 'bypass.example.com,yt-dl.org' + } + + assert select_proxy('https://example.com', proxies) == proxies['all'] + assert select_proxy('http://example.com', proxies) == proxies['http'] + assert select_proxy('http://bypass.example.com', proxies) is None + assert select_proxy('https://yt-dl.org', proxies) is None + + @pytest.mark.parametrize('socks_proxy,expected', [ + ('socks5h://example.com', { + 'proxytype': ProxyType.SOCKS5, + 'addr': 'example.com', + 'port': 1080, + 'rdns': True, + 'username': None, + 'password': None + }), + ('socks5://user:@example.com:5555', { + 'proxytype': ProxyType.SOCKS5, + 'addr': 'example.com', + 'port': 5555, + 'rdns': False, + 'username': 'user', + 'password': '' + }), + ('socks4://u%40ser:pa%20ss@127.0.0.1:1080', { + 'proxytype': ProxyType.SOCKS4, + 'addr': '127.0.0.1', + 'port': 1080, + 'rdns': False, + 'username': 'u@ser', + 'password': 'pa ss' + }), + ('socks4a://:pa%20ss@127.0.0.1', { + 'proxytype': ProxyType.SOCKS4A, + 'addr': '127.0.0.1', + 'port': 1080, + 'rdns': True, + 'username': '', + 'password': 'pa ss' + }) + ]) + def test_make_socks_proxy_opts(self, socks_proxy, expected): + assert make_socks_proxy_opts(socks_proxy) == expected + + def test_make_socks_proxy_unknown(self): + with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'): + make_socks_proxy_opts('socks://127.0.0.1') + + @pytest.mark.skipif(not certifi, reason='certifi is not installed') + def test_load_certifi(self): + context_certifi = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context_certifi.load_verify_locations(cafile=certifi.where()) + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ssl_load_certs(context, use_certifi=True) + assert context.get_ca_certs() == context_certifi.get_ca_certs() + + context_default = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context_default.load_default_certs() + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ssl_load_certs(context, use_certifi=False) + assert context.get_ca_certs() == context_default.get_ca_certs() + + if context_default.get_ca_certs() == context_certifi.get_ca_certs(): + pytest.skip('System uses certifi as default. The test is not valid') + + @pytest.mark.parametrize('method,status,expected', [ + ('GET', 303, 'GET'), + ('HEAD', 303, 'HEAD'), + ('PUT', 303, 'GET'), + ('POST', 301, 'GET'), + ('HEAD', 301, 'HEAD'), + ('POST', 302, 'GET'), + ('HEAD', 302, 'HEAD'), + ('PUT', 302, 'PUT'), + ('POST', 308, 'POST'), + ('POST', 307, 'POST'), + ('HEAD', 308, 'HEAD'), + ('HEAD', 307, 'HEAD'), + ]) + def test_get_redirect_method(self, method, status, expected): + assert get_redirect_method(method, status) == expected + + @pytest.mark.parametrize('headers,supported_encodings,expected', [ + ({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}), + ({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}), + ({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}), + ]) + def test_add_accept_encoding_header(self, headers, supported_encodings, expected): + headers = HTTPHeaderDict(headers) + add_accept_encoding_header(headers, supported_encodings) + assert headers == HTTPHeaderDict(expected) + + +class TestInstanceStoreMixin: + + class FakeInstanceStoreMixin(InstanceStoreMixin): + def _create_instance(self, **kwargs): + return random.randint(0, 1000000) + + def _close_instance(self, instance): + pass + + def test_mixin(self): + mixin = self.FakeInstanceStoreMixin() + assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) + + assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) + + assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}})) + + assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) + + assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4]) + + cookiejar = YoutubeDLCookieJar() + assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar) + + assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar()) + + # Different order + assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar) + + m = mixin._get_instance(t=1234) + assert mixin._get_instance(t=1234) == m + mixin._clear_instances() + assert mixin._get_instance(t=1234) != m + + +class TestNetworkingExceptions: + + @staticmethod + def create_response(status): + return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status) + + def test_http_error(self): + + response = self.create_response(403) + error = HTTPError(response) + + assert error.status == 403 + assert str(error) == error.msg == 'HTTP Error 403: Forbidden' + assert error.reason == response.reason + assert error.response is response + + data = error.response.read() + assert data == b'test' + assert repr(error) == '<HTTPError 403: Forbidden>' + + def test_redirect_http_error(self): + response = self.create_response(301) + error = HTTPError(response, redirect_loop=True) + assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)' + assert error.reason == 'Moved Permanently' + + def test_incomplete_read_error(self): + error = IncompleteRead(4, 3, cause='test') + assert isinstance(error, IncompleteRead) + assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>' + assert str(error) == error.msg == '4 bytes read, 3 more expected' + assert error.partial == 4 + assert error.expected == 3 + assert error.cause == 'test' + + error = IncompleteRead(3) + assert repr(error) == '<IncompleteRead: 3 bytes read>' + assert str(error) == '3 bytes read' diff --git a/test/test_overwrites.py b/test/test_overwrites.py new file mode 100644 index 0000000..6954c07 --- /dev/null +++ b/test/test_overwrites.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import subprocess + +from test.helper import is_download_test, try_rm + +root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +download_file = os.path.join(root_dir, 'test.webm') + + +@is_download_test +class TestOverwrites(unittest.TestCase): + def setUp(self): + # create an empty file + open(download_file, 'a').close() + + def test_default_overwrites(self): + outp = subprocess.Popen( + [ + sys.executable, 'yt_dlp/__main__.py', + '-o', 'test.webm', + 'https://www.youtube.com/watch?v=jNQXAC9IVRw' + ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue(b'has already been downloaded' in sout) + # if the file has no content, it has not been redownloaded + self.assertTrue(os.path.getsize(download_file) < 1) + + def test_yes_overwrites(self): + outp = subprocess.Popen( + [ + sys.executable, 'yt_dlp/__main__.py', '--yes-overwrites', + '-o', 'test.webm', + 'https://www.youtube.com/watch?v=jNQXAC9IVRw' + ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue(b'has already been downloaded' not in sout) + # if the file has no content, it has not been redownloaded + self.assertTrue(os.path.getsize(download_file) > 1) + + def tearDown(self): + try_rm(os.path.join(root_dir, 'test.webm')) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_plugins.py b/test/test_plugins.py new file mode 100644 index 0000000..6cde579 --- /dev/null +++ b/test/test_plugins.py @@ -0,0 +1,73 @@ +import importlib +import os +import shutil +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata') +sys.path.append(str(TEST_DATA_DIR)) +importlib.invalidate_caches() + +from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins + + +class TestPlugins(unittest.TestCase): + + TEST_PLUGIN_DIR = TEST_DATA_DIR / PACKAGE_NAME + + def test_directories_containing_plugins(self): + self.assertIn(self.TEST_PLUGIN_DIR, map(Path, directories())) + + def test_extractor_classes(self): + for module_name in tuple(sys.modules): + if module_name.startswith(f'{PACKAGE_NAME}.extractor'): + del sys.modules[module_name] + plugins_ie = load_plugins('extractor', 'IE') + + self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys()) + self.assertIn('NormalPluginIE', plugins_ie.keys()) + + # don't load modules with underscore prefix + self.assertFalse( + f'{PACKAGE_NAME}.extractor._ignore' in sys.modules.keys(), + 'loaded module beginning with underscore') + self.assertNotIn('IgnorePluginIE', plugins_ie.keys()) + + # Don't load extractors with underscore prefix + self.assertNotIn('_IgnoreUnderscorePluginIE', plugins_ie.keys()) + + # Don't load extractors not specified in __all__ (if supplied) + self.assertNotIn('IgnoreNotInAllPluginIE', plugins_ie.keys()) + self.assertIn('InAllPluginIE', plugins_ie.keys()) + + def test_postprocessor_classes(self): + plugins_pp = load_plugins('postprocessor', 'PP') + self.assertIn('NormalPluginPP', plugins_pp.keys()) + + def test_importing_zipped_module(self): + zip_path = TEST_DATA_DIR / 'zipped_plugins.zip' + shutil.make_archive(str(zip_path)[:-4], 'zip', str(zip_path)[:-4]) + sys.path.append(str(zip_path)) # add zip to search paths + importlib.invalidate_caches() # reset the import caches + + try: + for plugin_type in ('extractor', 'postprocessor'): + package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}') + self.assertIn(zip_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__)) + + plugins_ie = load_plugins('extractor', 'IE') + self.assertIn('ZippedPluginIE', plugins_ie.keys()) + + plugins_pp = load_plugins('postprocessor', 'PP') + self.assertIn('ZippedPluginPP', plugins_pp.keys()) + + finally: + sys.path.remove(str(zip_path)) + os.remove(zip_path) + importlib.invalidate_caches() # reset the import caches + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py new file mode 100644 index 0000000..3778d17 --- /dev/null +++ b/test/test_post_hooks.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import get_params, is_download_test, try_rm +import yt_dlp.YoutubeDL # isort: split +from yt_dlp.utils import DownloadError + + +class YoutubeDL(yt_dlp.YoutubeDL): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.to_stderr = self.to_screen + + +TEST_ID = 'gr51aVj-mLg' +EXPECTED_NAME = 'gr51aVj-mLg' + + +@is_download_test +class TestPostHooks(unittest.TestCase): + def setUp(self): + self.stored_name_1 = None + self.stored_name_2 = None + self.params = get_params({ + 'skip_download': False, + 'writeinfojson': False, + 'quiet': True, + 'verbose': False, + 'cachedir': False, + }) + self.files = [] + + def test_post_hooks(self): + self.params['post_hooks'] = [self.hook_one, self.hook_two] + ydl = YoutubeDL(self.params) + ydl.download([TEST_ID]) + self.assertEqual(self.stored_name_1, EXPECTED_NAME, 'Not the expected name from hook 1') + self.assertEqual(self.stored_name_2, EXPECTED_NAME, 'Not the expected name from hook 2') + + def test_post_hook_exception(self): + self.params['post_hooks'] = [self.hook_three] + ydl = YoutubeDL(self.params) + self.assertRaises(DownloadError, ydl.download, [TEST_ID]) + + def hook_one(self, filename): + self.stored_name_1, _ = os.path.splitext(os.path.basename(filename)) + self.files.append(filename) + + def hook_two(self, filename): + self.stored_name_2, _ = os.path.splitext(os.path.basename(filename)) + self.files.append(filename) + + def hook_three(self, filename): + self.files.append(filename) + raise Exception('Test exception for \'%s\'' % filename) + + def tearDown(self): + for f in self.files: + try_rm(f) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py new file mode 100644 index 0000000..52e5587 --- /dev/null +++ b/test/test_postprocessors.py @@ -0,0 +1,579 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from yt_dlp import YoutubeDL +from yt_dlp.compat import compat_shlex_quote +from yt_dlp.postprocessor import ( + ExecPP, + FFmpegThumbnailsConvertorPP, + MetadataFromFieldPP, + MetadataParserPP, + ModifyChaptersPP, + SponsorBlockPP, +) + + +class TestMetadataFromField(unittest.TestCase): + + def test_format_to_regex(self): + self.assertEqual( + MetadataParserPP.format_to_regex('%(title)s - %(artist)s'), + r'(?P<title>.+)\ \-\ (?P<artist>.+)') + self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)') + + def test_field_to_template(self): + self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s') + self.assertEqual(MetadataParserPP.field_to_template('1'), '1') + self.assertEqual(MetadataParserPP.field_to_template('foo bar'), 'foo bar') + self.assertEqual(MetadataParserPP.field_to_template(' literal'), ' literal') + + def test_metadatafromfield(self): + self.assertEqual( + MetadataFromFieldPP.to_action('%(title)s \\: %(artist)s:%(title)s : %(artist)s'), + (MetadataParserPP.Actions.INTERPRET, '%(title)s : %(artist)s', '%(title)s : %(artist)s')) + + +class TestConvertThumbnail(unittest.TestCase): + def test_escaping(self): + pp = FFmpegThumbnailsConvertorPP() + if not pp.available: + print('Skipping: ffmpeg not found') + return + + file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}' + tests = (('webp', 'png'), ('png', 'jpg')) + + for inp, out in tests: + out_file = file.format(out) + if os.path.exists(out_file): + os.remove(out_file) + pp.convert_thumbnail(file.format(inp), out) + assert os.path.exists(out_file) + + for _, out in tests: + os.remove(file.format(out)) + + +class TestExec(unittest.TestCase): + def test_parse_cmd(self): + pp = ExecPP(YoutubeDL(), '') + info = {'filepath': 'file name'} + cmd = 'echo %s' % compat_shlex_quote(info['filepath']) + + self.assertEqual(pp.parse_cmd('echo', info), cmd) + self.assertEqual(pp.parse_cmd('echo {}', info), cmd) + self.assertEqual(pp.parse_cmd('echo %(filepath)q', info), cmd) + + +class TestModifyChaptersPP(unittest.TestCase): + def setUp(self): + self._pp = ModifyChaptersPP(YoutubeDL()) + + @staticmethod + def _sponsor_chapter(start, end, cat, remove=False, title=None): + if title is None: + title = SponsorBlockPP.CATEGORIES[cat] + return { + 'start_time': start, + 'end_time': end, + '_categories': [(cat, start, end, title)], + **({'remove': True} if remove else {}), + } + + @staticmethod + def _chapter(start, end, title=None, remove=False): + c = {'start_time': start, 'end_time': end} + if title is not None: + c['title'] = title + if remove: + c['remove'] = True + return c + + def _chapters(self, ends, titles): + self.assertEqual(len(ends), len(titles)) + start = 0 + chapters = [] + for e, t in zip(ends, titles): + chapters.append(self._chapter(start, e, t)) + start = e + return chapters + + def _remove_marked_arrange_sponsors_test_impl( + self, chapters, expected_chapters, expected_removed): + actual_chapters, actual_removed = ( + self._pp._remove_marked_arrange_sponsors(chapters)) + for c in actual_removed: + c.pop('title', None) + c.pop('_categories', None) + actual_chapters = [{ + 'start_time': c['start_time'], + 'end_time': c['end_time'], + 'title': c['title'], + } for c in actual_chapters] + self.assertSequenceEqual(expected_chapters, actual_chapters) + self.assertSequenceEqual(expected_removed, actual_removed) + + def test_remove_marked_arrange_sponsors_CanGetThroughUnaltered(self): + chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) + + def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(30, 40, 'preview'), + self._sponsor_chapter(50, 60, 'filler')] + expected = self._chapters( + [10, 20, 30, 40, 50, 60, 70], + ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Preview/Recap', + 'c', '[SponsorBlock]: Filler Tangent', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_SponsorBlockChapters(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'chapter', title='sb c1'), + self._sponsor_chapter(15, 16, 'chapter', title='sb c2'), + self._sponsor_chapter(30, 40, 'preview'), + self._sponsor_chapter(50, 60, 'filler')] + expected = self._chapters( + [10, 15, 16, 20, 30, 40, 50, 60, 70], + ['c', '[SponsorBlock]: sb c1', '[SponsorBlock]: sb c1, sb c2', '[SponsorBlock]: sb c1', + 'c', '[SponsorBlock]: Preview/Recap', + 'c', '[SponsorBlock]: Filler Tangent', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self): + chapters = self._chapters([120], ['c']) + [ + self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'), + self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 85, 'selfpromo'), + self._sponsor_chapter(90, 120, 'selfpromo'), self._sponsor_chapter(100, 110, 'sponsor')] + expected = self._chapters( + [10, 20, 40, 45, 50, 60, 70, 85, 90, 100, 110, 120], + ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + '[SponsorBlock]: Sponsor', + 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion', + 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Sponsor', + '[SponsorBlock]: Unpaid/Self Promotion']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithCuts(self): + cuts = [self._chapter(10, 20, remove=True), + self._sponsor_chapter(30, 40, 'sponsor', remove=True), + self._chapter(50, 60, remove=True)] + chapters = self._chapters([70], ['c']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([40], ['c']), cuts) + + def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(30, 40, 'selfpromo', remove=True), + self._sponsor_chapter(50, 60, 'interaction')] + expected = self._chapters([10, 20, 40, 50, 60], + ['c', '[SponsorBlock]: Sponsor', 'c', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 40, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithSponsorCutInTheMiddle(self): + cuts = [self._sponsor_chapter(20, 30, 'selfpromo', remove=True), + self._chapter(40, 50, remove=True)] + chapters = self._chapters([70], ['c']) + [self._sponsor_chapter(10, 60, 'sponsor')] + cuts + expected = self._chapters( + [10, 40, 50], ['c', '[SponsorBlock]: Sponsor', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self): + cuts = [self._sponsor_chapter(20, 50, 'selfpromo', remove=True)] + chapters = self._chapters([60], ['c']) + [ + self._sponsor_chapter(10, 20, 'intro'), + self._sponsor_chapter(30, 40, 'sponsor'), + self._sponsor_chapter(50, 60, 'outro'), + ] + cuts + expected = self._chapters( + [10, 20, 30], ['c', '[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(20, 30, 'selfpromo'), + self._sponsor_chapter(30, 40, 'interaction')] + expected = self._chapters( + [10, 20, 30, 40, 70], + ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(20, 30, 'interaction', remove=True), + self._chapter(30, 40, remove=True), + self._sponsor_chapter(40, 50, 'selfpromo', remove=True), + self._sponsor_chapter(50, 60, 'interaction')] + expected = self._chapters([10, 20, 30, 40], + ['c', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(20, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 30, 'sponsor'), + self._sponsor_chapter(20, 50, 'selfpromo'), + self._sponsor_chapter(40, 60, 'interaction')] + expected = self._chapters( + [10, 20, 30, 40, 50, 60, 70], + ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 30, 'sponsor', remove=True), + self._sponsor_chapter(20, 50, 'selfpromo', remove=True), + self._sponsor_chapter(40, 60, 'interaction', remove=True)] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([20], ['c']), [self._chapter(10, 60, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(self): + chapters = self._chapters([170], ['c']) + [ + self._sponsor_chapter(0, 30, 'intro'), + self._sponsor_chapter(20, 50, 'sponsor'), + self._sponsor_chapter(40, 60, 'selfpromo'), + self._sponsor_chapter(70, 90, 'sponsor'), + self._sponsor_chapter(80, 100, 'sponsor'), + self._sponsor_chapter(90, 110, 'sponsor'), + self._sponsor_chapter(120, 140, 'selfpromo'), + self._sponsor_chapter(130, 160, 'interaction'), + self._sponsor_chapter(150, 170, 'outro')] + expected = self._chapters( + [20, 30, 40, 50, 60, 70, 110, 120, 130, 140, 150, 160, 170], + ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Intermission/Intro Animation, Sponsor', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', 'c', + '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', + '[SponsorBlock]: Interaction Reminder', + '[SponsorBlock]: Interaction Reminder, Endcards/Credits', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self): + chapters = self._chapters([170], ['c']) + [ + self._chapter(0, 30, remove=True), + self._sponsor_chapter(20, 50, 'sponsor', remove=True), + self._chapter(40, 60, remove=True), + self._sponsor_chapter(70, 90, 'sponsor', remove=True), + self._chapter(80, 100, remove=True), + self._chapter(90, 110, remove=True), + self._sponsor_chapter(120, 140, 'sponsor', remove=True), + self._sponsor_chapter(130, 160, 'selfpromo', remove=True), + self._chapter(150, 170, remove=True)] + expected_cuts = [self._chapter(0, 60, remove=True), + self._chapter(70, 110, remove=True), + self._chapter(120, 170, remove=True)] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([20], ['c']), expected_cuts) + + def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterCut(self): + chapters = self._chapters([60], ['c']) + [ + self._sponsor_chapter(10, 60, 'sponsor'), + self._sponsor_chapter(10, 40, 'intro'), + self._sponsor_chapter(30, 50, 'interaction'), + self._sponsor_chapter(30, 50, 'selfpromo', remove=True), + self._sponsor_chapter(40, 50, 'interaction'), + self._sponsor_chapter(50, 60, 'outro')] + expected = self._chapters( + [10, 30, 40], ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 30, 'sponsor'), + self._sponsor_chapter(20, 50, 'interaction'), + self._sponsor_chapter(30, 50, 'selfpromo', remove=True), + self._sponsor_chapter(40, 60, 'sponsor'), + self._sponsor_chapter(50, 60, 'interaction')] + expected = self._chapters( + [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 60, 'sponsor'), + self._sponsor_chapter(20, 60, 'interaction'), + self._sponsor_chapter(30, 50, 'selfpromo', remove=True)] + expected = self._chapters( + [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndCuts(self): + chapters = self._chapters([200], ['c']) + [ + self._sponsor_chapter(10, 40, 'sponsor'), + self._sponsor_chapter(10, 30, 'intro'), + self._chapter(20, 30, remove=True), + self._sponsor_chapter(30, 40, 'selfpromo'), + self._sponsor_chapter(50, 70, 'sponsor'), + self._sponsor_chapter(60, 80, 'interaction'), + self._chapter(70, 80, remove=True), + self._sponsor_chapter(70, 90, 'sponsor'), + self._sponsor_chapter(80, 100, 'interaction'), + self._sponsor_chapter(120, 170, 'selfpromo'), + self._sponsor_chapter(130, 180, 'outro'), + self._chapter(140, 150, remove=True), + self._chapter(150, 160, remove=True)] + expected = self._chapters( + [10, 20, 30, 40, 50, 70, 80, 100, 110, 130, 140, 160], + ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder', + '[SponsorBlock]: Interaction Reminder', 'c', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion, Endcards/Credits', '[SponsorBlock]: Endcards/Credits', 'c']) + expected_cuts = [self._chapter(20, 30, remove=True), + self._chapter(70, 80, remove=True), + self._chapter(140, 160, remove=True)] + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, expected_cuts) + + def test_remove_marked_arrange_sponsors_SponsorOverlapsMultipleChapters(self): + chapters = (self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + + [self._sponsor_chapter(10, 90, 'sponsor')]) + expected = self._chapters([10, 90, 100], ['c1', '[SponsorBlock]: Sponsor', 'c5']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutOverlapsMultipleChapters(self): + cuts = [self._chapter(10, 90, remove=True)] + chapters = self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + cuts + expected = self._chapters([10, 20], ['c1', 'c5']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorsWithinSomeChaptersAndOverlappingOthers(self): + chapters = (self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(20, 30, 'sponsor'), + self._sponsor_chapter(50, 70, 'selfpromo')]) + expected = self._chapters([10, 20, 30, 40, 50, 70, 80], + ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c2', 'c3', + '[SponsorBlock]: Unpaid/Self Promotion', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutsWithinSomeChaptersAndOverlappingOthers(self): + cuts = [self._chapter(20, 30, remove=True), self._chapter(50, 70, remove=True)] + chapters = self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + cuts + expected = self._chapters([10, 30, 40, 50], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_ChaptersAfterLastSponsor(self): + chapters = (self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(10, 30, 'music_offtopic')]) + expected = self._chapters( + [10, 30, 40, 50, 60], + ['c1', '[SponsorBlock]: Non-Music Section', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChaptersAfterLastCut(self): + cuts = [self._chapter(10, 30, remove=True)] + chapters = self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + cuts + expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorStartsAtChapterStart(self): + chapters = (self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(20, 30, 'sponsor')]) + expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutStartsAtChapterStart(self): + cuts = [self._chapter(20, 30, remove=True)] + chapters = self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorEndsAtChapterEnd(self): + chapters = (self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(20, 30, 'sponsor')]) + expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutEndsAtChapterEnd(self): + cuts = [self._chapter(20, 30, remove=True)] + chapters = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorCoincidesWithChapters(self): + chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(10, 30, 'sponsor')]) + expected = self._chapters([10, 30, 40], ['c1', '[SponsorBlock]: Sponsor', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutCoincidesWithChapters(self): + cuts = [self._chapter(10, 30, remove=True)] + chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts + expected = self._chapters([10, 20], ['c1', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorsAtVideoBoundaries(self): + chapters = (self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')]) + expected = self._chapters( + [10, 20, 40, 50, 60], ['[SponsorBlock]: Intermission/Intro Animation', 'c1', 'c2', 'c3', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutsAtVideoBoundaries(self): + cuts = [self._chapter(0, 10, remove=True), self._chapter(50, 60, remove=True)] + chapters = self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorsOverlapChaptersAtVideoBoundaries(self): + chapters = (self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(30, 50, 'outro')]) + expected = self._chapters( + [20, 30, 50], ['[SponsorBlock]: Intermission/Intro Animation', 'c2', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutsOverlapChaptersAtVideoBoundaries(self): + cuts = [self._chapter(0, 20, remove=True), self._chapter(30, 50, remove=True)] + chapters = self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10], ['c2']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_EverythingSponsored(self): + chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(20, 40, 'outro')]) + expected = self._chapters([20, 40], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_EverythingCut(self): + cuts = [self._chapter(0, 20, remove=True), self._chapter(20, 40, remove=True)] + chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, [], [self._chapter(0, 40, remove=True)]) + + def test_remove_marked_arrange_sponsors_TinyChaptersInTheOriginalArePreserved(self): + chapters = self._chapters([0.1, 0.2, 0.3, 0.4], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) + + def test_remove_marked_arrange_sponsors_TinySponsorsAreIgnored(self): + chapters = [self._sponsor_chapter(0, 0.1, 'intro'), self._chapter(0.1, 0.2, 'c1'), + self._sponsor_chapter(0.2, 0.3, 'sponsor'), self._chapter(0.3, 0.4, 'c2'), + self._sponsor_chapter(0.4, 0.5, 'outro')] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([0.3, 0.5], ['c1', 'c2']), []) + + def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromCutsAreIgnored(self): + cuts = [self._chapter(1.5, 2.5, remove=True)] + chapters = self._chapters([2, 3, 3.5], ['c1', 'c2', 'c3']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts) + + def test_remove_marked_arrange_sponsors_SingleTinyChapterIsPreserved(self): + cuts = [self._chapter(0.5, 2, remove=True)] + chapters = self._chapters([2], ['c']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([0.5], ['c']), cuts) + + def test_remove_marked_arrange_sponsors_TinyChapterAtTheStartPrependedToTheNext(self): + cuts = [self._chapter(0.5, 2, remove=True)] + chapters = self._chapters([2, 4], ['c1', 'c2']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([2.5], ['c2']), cuts) + + def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self): + chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [ + self._sponsor_chapter(1.5, 2.5, 'sponsor')] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) + + def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): + chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [ + self._sponsor_chapter(1, 3, 'sponsor'), + self._sponsor_chapter(2.5, 4, 'selfpromo') + ] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([1, 3, 4, 5], [ + 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), []) + + def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self): + chapters = self._chapters([4], ['c']) + [ + self._sponsor_chapter(1.5, 2, 'sponsor'), + self._sponsor_chapter(2, 4, 'selfpromo') + ] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), []) + + def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self): + self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s' + chapters = self._chapters([10], ['c']) + [ + self._sponsor_chapter(2, 8, 'sponsor'), + self._sponsor_chapter(4, 6, 'selfpromo') + ] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([2, 4, 6, 8, 10], [ + 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Sponsor', 'c' + ]), []) + + def test_make_concat_opts_CommonCase(self): + sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] + expected = '''ffconcat version 1.0 +file 'file:test' +outpoint 1.000000 +file 'file:test' +inpoint 2.000000 +outpoint 10.000000 +file 'file:test' +inpoint 20.000000 +''' + opts = self._pp._make_concat_opts(sponsor_chapters, 30) + self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) + + def test_make_concat_opts_NoZeroDurationChunkAtVideoStart(self): + sponsor_chapters = [self._chapter(0, 1, 's1'), self._chapter(10, 20, 's2')] + expected = '''ffconcat version 1.0 +file 'file:test' +inpoint 1.000000 +outpoint 10.000000 +file 'file:test' +inpoint 20.000000 +''' + opts = self._pp._make_concat_opts(sponsor_chapters, 30) + self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) + + def test_make_concat_opts_NoZeroDurationChunkAtVideoEnd(self): + sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] + expected = '''ffconcat version 1.0 +file 'file:test' +outpoint 1.000000 +file 'file:test' +inpoint 2.000000 +outpoint 10.000000 +''' + opts = self._pp._make_concat_opts(sponsor_chapters, 20) + self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) + + def test_quote_for_concat_RunsOfQuotes(self): + self.assertEqual( + r"'special '\'' '\'\''characters'\'\'\''galore'", + self._pp._quote_for_ffmpeg("special ' ''characters'''galore")) + + def test_quote_for_concat_QuotesAtStart(self): + self.assertEqual( + r"\'\'\''special '\'' characters '\'' galore'", + self._pp._quote_for_ffmpeg("'''special ' characters ' galore")) + + def test_quote_for_concat_QuotesAtEnd(self): + self.assertEqual( + r"'special '\'' characters '\'' galore'\'\'\'", + self._pp._quote_for_ffmpeg("special ' characters ' galore'''")) diff --git a/test/test_socks.py b/test/test_socks.py new file mode 100644 index 0000000..cb22b61 --- /dev/null +++ b/test/test_socks.py @@ -0,0 +1,477 @@ +#!/usr/bin/env python3 +# Allow direct execution +import os +import sys +import threading +import unittest + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import abc +import contextlib +import enum +import functools +import http.server +import json +import random +import socket +import struct +import time +from socketserver import ( + BaseRequestHandler, + StreamRequestHandler, + ThreadingTCPServer, +) + +from test.helper import http_server_port, verify_address_availability +from yt_dlp.networking import Request +from yt_dlp.networking.exceptions import ProxyError, TransportError +from yt_dlp.socks import ( + SOCKS4_REPLY_VERSION, + SOCKS4_VERSION, + SOCKS5_USER_AUTH_SUCCESS, + SOCKS5_USER_AUTH_VERSION, + SOCKS5_VERSION, + Socks5AddressType, + Socks5Auth, +) + +SOCKS5_USER_AUTH_FAILURE = 0x1 + + +class Socks4CD(enum.IntEnum): + REQUEST_GRANTED = 90 + REQUEST_REJECTED_OR_FAILED = 91 + REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD = 92 + REQUEST_REJECTED_DIFFERENT_USERID = 93 + + +class Socks5Reply(enum.IntEnum): + SUCCEEDED = 0x0 + GENERAL_FAILURE = 0x1 + CONNECTION_NOT_ALLOWED = 0x2 + NETWORK_UNREACHABLE = 0x3 + HOST_UNREACHABLE = 0x4 + CONNECTION_REFUSED = 0x5 + TTL_EXPIRED = 0x6 + COMMAND_NOT_SUPPORTED = 0x7 + ADDRESS_TYPE_NOT_SUPPORTED = 0x8 + + +class SocksTestRequestHandler(BaseRequestHandler): + + def __init__(self, *args, socks_info=None, **kwargs): + self.socks_info = socks_info + super().__init__(*args, **kwargs) + + +class SocksProxyHandler(BaseRequestHandler): + def __init__(self, request_handler_class, socks_server_kwargs, *args, **kwargs): + self.socks_kwargs = socks_server_kwargs or {} + self.request_handler_class = request_handler_class + super().__init__(*args, **kwargs) + + +class Socks5ProxyHandler(StreamRequestHandler, SocksProxyHandler): + + # SOCKS5 protocol https://tools.ietf.org/html/rfc1928 + # SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929 + + def handle(self): + sleep = self.socks_kwargs.get('sleep') + if sleep: + time.sleep(sleep) + version, nmethods = self.connection.recv(2) + assert version == SOCKS5_VERSION + methods = list(self.connection.recv(nmethods)) + + auth = self.socks_kwargs.get('auth') + + if auth is not None and Socks5Auth.AUTH_USER_PASS not in methods: + self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE)) + self.server.close_request(self.request) + return + + elif Socks5Auth.AUTH_USER_PASS in methods: + self.connection.sendall(struct.pack("!BB", SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS)) + + _, user_len = struct.unpack('!BB', self.connection.recv(2)) + username = self.connection.recv(user_len).decode() + pass_len = ord(self.connection.recv(1)) + password = self.connection.recv(pass_len).decode() + + if username == auth[0] and password == auth[1]: + self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_SUCCESS)) + else: + self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_FAILURE)) + self.server.close_request(self.request) + return + + elif Socks5Auth.AUTH_NONE in methods: + self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NONE)) + else: + self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE)) + self.server.close_request(self.request) + return + + version, command, _, address_type = struct.unpack('!BBBB', self.connection.recv(4)) + socks_info = { + 'version': version, + 'auth_methods': methods, + 'command': command, + 'client_address': self.client_address, + 'ipv4_address': None, + 'domain_address': None, + 'ipv6_address': None, + } + if address_type == Socks5AddressType.ATYP_IPV4: + socks_info['ipv4_address'] = socket.inet_ntoa(self.connection.recv(4)) + elif address_type == Socks5AddressType.ATYP_DOMAINNAME: + socks_info['domain_address'] = self.connection.recv(ord(self.connection.recv(1))).decode() + elif address_type == Socks5AddressType.ATYP_IPV6: + socks_info['ipv6_address'] = socket.inet_ntop(socket.AF_INET6, self.connection.recv(16)) + else: + self.server.close_request(self.request) + + socks_info['port'] = struct.unpack('!H', self.connection.recv(2))[0] + + # dummy response, the returned IP is just a placeholder + self.connection.sendall(struct.pack( + '!BBBBIH', SOCKS5_VERSION, self.socks_kwargs.get('reply', Socks5Reply.SUCCEEDED), 0x0, 0x1, 0x7f000001, 40000)) + + self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info) + + +class Socks4ProxyHandler(StreamRequestHandler, SocksProxyHandler): + + # SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol + # SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol + + def _read_until_null(self): + return b''.join(iter(functools.partial(self.connection.recv, 1), b'\x00')) + + def handle(self): + sleep = self.socks_kwargs.get('sleep') + if sleep: + time.sleep(sleep) + socks_info = { + 'version': SOCKS4_VERSION, + 'command': None, + 'client_address': self.client_address, + 'ipv4_address': None, + 'port': None, + 'domain_address': None, + } + version, command, dest_port, dest_ip = struct.unpack('!BBHI', self.connection.recv(8)) + socks_info['port'] = dest_port + socks_info['command'] = command + if version != SOCKS4_VERSION: + self.server.close_request(self.request) + return + use_remote_dns = False + if 0x0 < dest_ip <= 0xFF: + use_remote_dns = True + else: + socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack("!I", dest_ip)) + + user_id = self._read_until_null().decode() + if user_id != (self.socks_kwargs.get('user_id') or ''): + self.connection.sendall(struct.pack( + '!BBHI', SOCKS4_REPLY_VERSION, Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID, 0x00, 0x00000000)) + self.server.close_request(self.request) + return + + if use_remote_dns: + socks_info['domain_address'] = self._read_until_null().decode() + + # dummy response, the returned IP is just a placeholder + self.connection.sendall( + struct.pack( + '!BBHI', SOCKS4_REPLY_VERSION, + self.socks_kwargs.get('cd_reply', Socks4CD.REQUEST_GRANTED), 40000, 0x7f000001)) + + self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info) + + +class IPv6ThreadingTCPServer(ThreadingTCPServer): + address_family = socket.AF_INET6 + + +class SocksHTTPTestRequestHandler(http.server.BaseHTTPRequestHandler, SocksTestRequestHandler): + def do_GET(self): + if self.path == '/socks_info': + payload = json.dumps(self.socks_info.copy()) + self.send_response(200) + self.send_header('Content-Type', 'application/json; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload.encode()) + + +class SocksWebSocketTestRequestHandler(SocksTestRequestHandler): + def handle(self): + import websockets.sync.server + protocol = websockets.ServerProtocol() + connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0) + connection.handshake() + connection.send(json.dumps(self.socks_info)) + connection.close() + + +@contextlib.contextmanager +def socks_server(socks_server_class, request_handler, bind_ip=None, **socks_server_kwargs): + server = server_thread = None + try: + bind_address = bind_ip or '127.0.0.1' + server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer + server = server_type( + (bind_address, 0), functools.partial(socks_server_class, request_handler, socks_server_kwargs)) + server_port = http_server_port(server) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + if '.' not in bind_address: + yield f'[{bind_address}]:{server_port}' + else: + yield f'{bind_address}:{server_port}' + finally: + server.shutdown() + server.server_close() + server_thread.join(2.0) + + +class SocksProxyTestContext(abc.ABC): + REQUEST_HANDLER_CLASS = None + + def socks_server(self, server_class, *args, **kwargs): + return socks_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs) + + @abc.abstractmethod + def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict: + """return a dict of socks_info""" + + +class HTTPSocksTestProxyContext(SocksProxyTestContext): + REQUEST_HANDLER_CLASS = SocksHTTPTestRequestHandler + + def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): + request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/socks_info', **req_kwargs) + handler.validate(request) + return json.loads(handler.send(request).read().decode()) + + +class WebSocketSocksTestProxyContext(SocksProxyTestContext): + REQUEST_HANDLER_CLASS = SocksWebSocketTestRequestHandler + + def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): + request = Request(f'ws://{target_domain or "127.0.0.1"}:{target_port or "40000"}', **req_kwargs) + handler.validate(request) + ws = handler.send(request) + ws.send('socks_info') + socks_info = ws.recv() + ws.close() + return json.loads(socks_info) + + +CTX_MAP = { + 'http': HTTPSocksTestProxyContext, + 'ws': WebSocketSocksTestProxyContext, +} + + +@pytest.fixture(scope='module') +def ctx(request): + return CTX_MAP[request.param]() + + +class TestSocks4Proxy: + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks4_no_auth(self, handler, ctx): + with handler() as rh: + with ctx.socks_server(Socks4ProxyHandler) as server_address: + response = ctx.socks_info_request( + rh, proxies={'all': f'socks4://{server_address}'}) + assert response['version'] == 4 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks4_auth(self, handler, ctx): + with handler() as rh: + with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address: + with pytest.raises(ProxyError): + ctx.socks_info_request(rh, proxies={'all': f'socks4://{server_address}'}) + response = ctx.socks_info_request( + rh, proxies={'all': f'socks4://user:@{server_address}'}) + assert response['version'] == 4 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks4a_ipv4_target(self, handler, ctx): + with ctx.socks_server(Socks4ProxyHandler) as server_address: + with handler(proxies={'all': f'socks4a://{server_address}'}) as rh: + response = ctx.socks_info_request(rh, target_domain='127.0.0.1') + assert response['version'] == 4 + assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1') + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks4a_domain_target(self, handler, ctx): + with ctx.socks_server(Socks4ProxyHandler) as server_address: + with handler(proxies={'all': f'socks4a://{server_address}'}) as rh: + response = ctx.socks_info_request(rh, target_domain='localhost') + assert response['version'] == 4 + assert response['ipv4_address'] is None + assert response['domain_address'] == 'localhost' + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_ipv4_client_source_address(self, handler, ctx): + with ctx.socks_server(Socks4ProxyHandler) as server_address: + source_address = f'127.0.0.{random.randint(5, 255)}' + verify_address_availability(source_address) + with handler(proxies={'all': f'socks4://{server_address}'}, + source_address=source_address) as rh: + response = ctx.socks_info_request(rh) + assert response['client_address'][0] == source_address + assert response['version'] == 4 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + @pytest.mark.parametrize('reply_code', [ + Socks4CD.REQUEST_REJECTED_OR_FAILED, + Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD, + Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID, + ]) + def test_socks4_errors(self, handler, ctx, reply_code): + with ctx.socks_server(Socks4ProxyHandler, cd_reply=reply_code) as server_address: + with handler(proxies={'all': f'socks4://{server_address}'}) as rh: + with pytest.raises(ProxyError): + ctx.socks_info_request(rh) + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_ipv6_socks4_proxy(self, handler, ctx): + with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address: + with handler(proxies={'all': f'socks4://{server_address}'}) as rh: + response = ctx.socks_info_request(rh, target_domain='127.0.0.1') + assert response['client_address'][0] == '::1' + assert response['ipv4_address'] == '127.0.0.1' + assert response['version'] == 4 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_timeout(self, handler, ctx): + with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address: + with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh: + with pytest.raises(TransportError): + ctx.socks_info_request(rh) + + +class TestSocks5Proxy: + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks5_no_auth(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler) as server_address: + with handler(proxies={'all': f'socks5://{server_address}'}) as rh: + response = ctx.socks_info_request(rh) + assert response['auth_methods'] == [0x0] + assert response['version'] == 5 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks5_user_pass(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address: + with handler() as rh: + with pytest.raises(ProxyError): + ctx.socks_info_request(rh, proxies={'all': f'socks5://{server_address}'}) + + response = ctx.socks_info_request( + rh, proxies={'all': f'socks5://test:testpass@{server_address}'}) + + assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS] + assert response['version'] == 5 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks5_ipv4_target(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler) as server_address: + with handler(proxies={'all': f'socks5://{server_address}'}) as rh: + response = ctx.socks_info_request(rh, target_domain='127.0.0.1') + assert response['ipv4_address'] == '127.0.0.1' + assert response['version'] == 5 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks5_domain_target(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler) as server_address: + with handler(proxies={'all': f'socks5://{server_address}'}) as rh: + response = ctx.socks_info_request(rh, target_domain='localhost') + assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1') + assert response['version'] == 5 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks5h_domain_target(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler) as server_address: + with handler(proxies={'all': f'socks5h://{server_address}'}) as rh: + response = ctx.socks_info_request(rh, target_domain='localhost') + assert response['ipv4_address'] is None + assert response['domain_address'] == 'localhost' + assert response['version'] == 5 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks5h_ip_target(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler) as server_address: + with handler(proxies={'all': f'socks5h://{server_address}'}) as rh: + response = ctx.socks_info_request(rh, target_domain='127.0.0.1') + assert response['ipv4_address'] == '127.0.0.1' + assert response['domain_address'] is None + assert response['version'] == 5 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_socks5_ipv6_destination(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler) as server_address: + with handler(proxies={'all': f'socks5://{server_address}'}) as rh: + response = ctx.socks_info_request(rh, target_domain='[::1]') + assert response['ipv6_address'] == '::1' + assert response['version'] == 5 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_ipv6_socks5_proxy(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address: + with handler(proxies={'all': f'socks5://{server_address}'}) as rh: + response = ctx.socks_info_request(rh, target_domain='127.0.0.1') + assert response['client_address'][0] == '::1' + assert response['ipv4_address'] == '127.0.0.1' + assert response['version'] == 5 + + # XXX: is there any feasible way of testing IPv6 source addresses? + # Same would go for non-proxy source_address test... + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + def test_ipv4_client_source_address(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler) as server_address: + source_address = f'127.0.0.{random.randint(5, 255)}' + verify_address_availability(source_address) + with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh: + response = ctx.socks_info_request(rh) + assert response['client_address'][0] == source_address + assert response['version'] == 5 + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True) + @pytest.mark.parametrize('reply_code', [ + Socks5Reply.GENERAL_FAILURE, + Socks5Reply.CONNECTION_NOT_ALLOWED, + Socks5Reply.NETWORK_UNREACHABLE, + Socks5Reply.HOST_UNREACHABLE, + Socks5Reply.CONNECTION_REFUSED, + Socks5Reply.TTL_EXPIRED, + Socks5Reply.COMMAND_NOT_SUPPORTED, + Socks5Reply.ADDRESS_TYPE_NOT_SUPPORTED, + ]) + def test_socks5_errors(self, handler, ctx, reply_code): + with ctx.socks_server(Socks5ProxyHandler, reply=reply_code) as server_address: + with handler(proxies={'all': f'socks5://{server_address}'}) as rh: + with pytest.raises(ProxyError): + ctx.socks_info_request(rh) + + @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Websockets', 'ws')], indirect=True) + def test_timeout(self, handler, ctx): + with ctx.socks_server(Socks5ProxyHandler, sleep=2) as server_address: + with handler(proxies={'all': f'socks5://{server_address}'}, timeout=1) as rh: + with pytest.raises(TransportError): + ctx.socks_info_request(rh) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_subtitles.py b/test/test_subtitles.py new file mode 100644 index 0000000..5736289 --- /dev/null +++ b/test/test_subtitles.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import FakeYDL, is_download_test, md5 +from yt_dlp.extractor import ( + NPOIE, + NRKTVIE, + PBSIE, + CeskaTelevizeIE, + ComedyCentralIE, + DailymotionIE, + DemocracynowIE, + LyndaIE, + RaiPlayIE, + RTVEALaCartaIE, + TedTalkIE, + ThePlatformFeedIE, + ThePlatformIE, + VikiIE, + VimeoIE, + WallaIE, + YoutubeIE, +) + + +@is_download_test +class BaseTestSubtitles(unittest.TestCase): + url = None + IE = None + + def setUp(self): + self.DL = FakeYDL() + self.ie = self.IE() + self.DL.add_info_extractor(self.ie) + if not self.IE.working(): + print('Skipping: %s marked as not _WORKING' % self.IE.ie_key()) + self.skipTest('IE marked as not _WORKING') + + def getInfoDict(self): + info_dict = self.DL.extract_info(self.url, download=False) + return info_dict + + def getSubtitles(self): + info_dict = self.getInfoDict() + subtitles = info_dict['requested_subtitles'] + if not subtitles: + return subtitles + for sub_info in subtitles.values(): + if sub_info.get('data') is None: + uf = self.DL.urlopen(sub_info['url']) + sub_info['data'] = uf.read().decode() + return {l: sub_info['data'] for l, sub_info in subtitles.items()} + + +@is_download_test +class TestYoutubeSubtitles(BaseTestSubtitles): + # Available subtitles for QRS8MkLhQmM: + # Language formats + # ru vtt, ttml, srv3, srv2, srv1, json3 + # fr vtt, ttml, srv3, srv2, srv1, json3 + # en vtt, ttml, srv3, srv2, srv1, json3 + # nl vtt, ttml, srv3, srv2, srv1, json3 + # de vtt, ttml, srv3, srv2, srv1, json3 + # ko vtt, ttml, srv3, srv2, srv1, json3 + # it vtt, ttml, srv3, srv2, srv1, json3 + # zh-Hant vtt, ttml, srv3, srv2, srv1, json3 + # hi vtt, ttml, srv3, srv2, srv1, json3 + # pt-BR vtt, ttml, srv3, srv2, srv1, json3 + # es-MX vtt, ttml, srv3, srv2, srv1, json3 + # ja vtt, ttml, srv3, srv2, srv1, json3 + # pl vtt, ttml, srv3, srv2, srv1, json3 + url = 'QRS8MkLhQmM' + IE = YoutubeIE + + def test_youtube_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(len(subtitles.keys()), 13) + self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') + self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9') + for lang in ['fr', 'de']: + self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + + def _test_subtitles_format(self, fmt, md5_hash, lang='en'): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = fmt + subtitles = self.getSubtitles() + self.assertEqual(md5(subtitles[lang]), md5_hash) + + def test_youtube_subtitles_ttml_format(self): + self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2') + + def test_youtube_subtitles_vtt_format(self): + self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d') + + def test_youtube_subtitles_json3_format(self): + self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b') + + def _test_automatic_captions(self, url, lang): + self.url = url + self.DL.params['writeautomaticsub'] = True + self.DL.params['subtitleslangs'] = [lang] + subtitles = self.getSubtitles() + self.assertTrue(subtitles[lang] is not None) + + def test_youtube_automatic_captions(self): + # Available automatic captions for 8YoUxe5ncPo: + # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3) + # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr, + # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da, + # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv, + # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy, + # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur, + # mt, ms, mr, ug, ta, my, af, sw, is, am, + # *it*, iw, sv, ar, + # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi, + # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl, + # ky, sd + # ... + self._test_automatic_captions('8YoUxe5ncPo', 'it') + + @unittest.skip('Video unavailable') + def test_youtube_translated_subtitles(self): + # This video has a subtitles track, which can be translated (#4555) + self._test_automatic_captions('Ky9eprVWzlI', 'it') + + def test_youtube_nosubtitles(self): + self.DL.expect_warning('video doesn\'t have subtitles') + # Available automatic captions for 8YoUxe5ncPo: + # ... + # 8YoUxe5ncPo has no subtitles + self.url = '8YoUxe5ncPo' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertFalse(subtitles) + + +@is_download_test +class TestDailymotionSubtitles(BaseTestSubtitles): + url = 'http://www.dailymotion.com/video/xczg00' + IE = DailymotionIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertTrue(len(subtitles.keys()) >= 6) + self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') + self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') + for lang in ['es', 'fr', 'de']: + self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + + def test_nosubtitles(self): + self.DL.expect_warning('video doesn\'t have subtitles') + self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertFalse(subtitles) + + +@is_download_test +@unittest.skip('IE broken') +class TestTedSubtitles(BaseTestSubtitles): + url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' + IE = TedTalkIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertTrue(len(subtitles.keys()) >= 28) + self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') + self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') + for lang in ['es', 'fr', 'de']: + self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + + +@is_download_test +class TestVimeoSubtitles(BaseTestSubtitles): + url = 'http://vimeo.com/76979871' + IE = VimeoIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'}) + self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1') + self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac') + + def test_nosubtitles(self): + self.DL.expect_warning('video doesn\'t have subtitles') + self.url = 'http://vimeo.com/68093876' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertFalse(subtitles) + + +@is_download_test +@unittest.skip('IE broken') +class TestWallaSubtitles(BaseTestSubtitles): + url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' + IE = WallaIE + + def test_allsubtitles(self): + self.DL.expect_warning('Automatic Captions not supported by this server') + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'heb'}) + self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920') + + def test_nosubtitles(self): + self.DL.expect_warning('video doesn\'t have subtitles') + self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertFalse(subtitles) + + +@is_download_test +@unittest.skip('IE broken') +class TestCeskaTelevizeSubtitles(BaseTestSubtitles): + url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' + IE = CeskaTelevizeIE + + def test_allsubtitles(self): + self.DL.expect_warning('Automatic Captions not supported by this server') + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'cs'}) + self.assertTrue(len(subtitles['cs']) > 20000) + + def test_nosubtitles(self): + self.DL.expect_warning('video doesn\'t have subtitles') + self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertFalse(subtitles) + + +@is_download_test +@unittest.skip('IE broken') +class TestLyndaSubtitles(BaseTestSubtitles): + url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' + IE = LyndaIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'en'}) + self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') + + +@is_download_test +@unittest.skip('IE broken') +class TestNPOSubtitles(BaseTestSubtitles): + url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860' + IE = NPOIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'nl'}) + self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') + + +@is_download_test +@unittest.skip('IE broken') +class TestMTVSubtitles(BaseTestSubtitles): + url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans' + IE = ComedyCentralIE + + def getInfoDict(self): + return super().getInfoDict()['entries'][0] + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'en'}) + self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961') + + +@is_download_test +class TestNRKSubtitles(BaseTestSubtitles): + url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1' + IE = NRKTVIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'nb-ttv'}) + self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149') + + +@is_download_test +class TestRaiPlaySubtitles(BaseTestSubtitles): + IE = RaiPlayIE + + def test_subtitles_key(self): + self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'it'}) + self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') + + def test_subtitles_array_key(self): + self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'it'}) + self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd') + + +@is_download_test +@unittest.skip('IE broken - DRM only') +class TestVikiSubtitles(BaseTestSubtitles): + url = 'http://www.viki.com/videos/1060846v-punch-episode-18' + IE = VikiIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'en'}) + self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') + + +@is_download_test +class TestThePlatformSubtitles(BaseTestSubtitles): + # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/ + # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/) + url = 'theplatform:JFUjUE1_ehvq' + IE = ThePlatformIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'en'}) + self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') + + +@is_download_test +@unittest.skip('IE broken') +class TestThePlatformFeedSubtitles(BaseTestSubtitles): + url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207' + IE = ThePlatformFeedIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'en'}) + self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade') + + +@is_download_test +class TestRtveSubtitles(BaseTestSubtitles): + url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' + IE = RTVEALaCartaIE + + def test_allsubtitles(self): + print('Skipping, only available from Spain') + return + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'es'}) + self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') + + +@is_download_test +class TestDemocracynowSubtitles(BaseTestSubtitles): + url = 'http://www.democracynow.org/shows/2015/7/3' + IE = DemocracynowIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'en'}) + self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045') + + def test_subtitles_in_page(self): + self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree' + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'en'}) + self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045') + + +@is_download_test +class TestPBSSubtitles(BaseTestSubtitles): + url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/' + IE = PBSIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), {'en'}) + + def test_subtitles_dfxp_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'dfxp' + subtitles = self.getSubtitles() + self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b']) + + def test_subtitles_vtt_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'vtt' + subtitles = self.getSubtitles() + self.assertIn( + md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73']) + + def test_subtitles_srt_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'srt' + subtitles = self.getSubtitles() + self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371']) + + def test_subtitles_sami_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'sami' + subtitles = self.getSubtitles() + self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd']) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_update.py b/test/test_update.py new file mode 100644 index 0000000..bc13956 --- /dev/null +++ b/test/test_update.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import FakeYDL, report_warning +from yt_dlp.update import UpdateInfo, Updater + + +# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES +TEST_UPDATE_SOURCES = { + 'stable': 'yt-dlp/yt-dlp', + 'nightly': 'yt-dlp/yt-dlp-nightly-builds', + 'master': 'yt-dlp/yt-dlp-master-builds', +} + +TEST_API_DATA = { + 'yt-dlp/yt-dlp/latest': { + 'tag_name': '2023.12.31', + 'target_commitish': 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb', + 'name': 'yt-dlp 2023.12.31', + 'body': 'BODY', + }, + 'yt-dlp/yt-dlp-nightly-builds/latest': { + 'tag_name': '2023.12.31.123456', + 'target_commitish': 'master', + 'name': 'yt-dlp nightly 2023.12.31.123456', + 'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/cccccccccccccccccccccccccccccccccccccccc', + }, + 'yt-dlp/yt-dlp-master-builds/latest': { + 'tag_name': '2023.12.31.987654', + 'target_commitish': 'master', + 'name': 'yt-dlp master 2023.12.31.987654', + 'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/dddddddddddddddddddddddddddddddddddddddd', + }, + 'yt-dlp/yt-dlp/tags/testing': { + 'tag_name': 'testing', + 'target_commitish': '9999999999999999999999999999999999999999', + 'name': 'testing', + 'body': 'BODY', + }, + 'fork/yt-dlp/latest': { + 'tag_name': '2050.12.31', + 'target_commitish': 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee', + 'name': '2050.12.31', + 'body': 'BODY', + }, + 'fork/yt-dlp/tags/pr0000': { + 'tag_name': 'pr0000', + 'target_commitish': 'ffffffffffffffffffffffffffffffffffffffff', + 'name': 'pr1234 2023.11.11.000000', + 'body': 'BODY', + }, + 'fork/yt-dlp/tags/pr1234': { + 'tag_name': 'pr1234', + 'target_commitish': '0000000000000000000000000000000000000000', + 'name': 'pr1234 2023.12.31.555555', + 'body': 'BODY', + }, + 'fork/yt-dlp/tags/pr9999': { + 'tag_name': 'pr9999', + 'target_commitish': '1111111111111111111111111111111111111111', + 'name': 'pr9999', + 'body': 'BODY', + }, + 'fork/yt-dlp-satellite/tags/pr987': { + 'tag_name': 'pr987', + 'target_commitish': 'master', + 'name': 'pr987', + 'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/2222222222222222222222222222222222222222', + }, +} + +TEST_LOCKFILE_COMMENT = '# This file is used for regulating self-update' + +TEST_LOCKFILE_V1 = r'''%s +lock 2022.08.18.36 .+ Python 3\.6 +lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 +lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) +''' % TEST_LOCKFILE_COMMENT + +TEST_LOCKFILE_V2_TMPL = r'''%s +lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 +lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 +lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) +lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 +lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) +lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 +lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) +''' + +TEST_LOCKFILE_V2 = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_COMMENT + +TEST_LOCKFILE_ACTUAL = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_V1.rstrip('\n') + +TEST_LOCKFILE_FORK = r'''%s# Test if a fork blocks updates to non-numeric tags +lockV2 fork/yt-dlp pr0000 .+ Python 3.6 +lockV2 fork/yt-dlp pr1234 (?!win_x86_exe).+ Python 3\.7 +lockV2 fork/yt-dlp pr1234 win_x86_exe .+ Windows-(?:Vista|2008Server) +lockV2 fork/yt-dlp pr9999 .+ Python 3.11 +''' % TEST_LOCKFILE_ACTUAL + + +class FakeUpdater(Updater): + current_version = '2022.01.01' + current_commit = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + + _channel = 'stable' + _origin = 'yt-dlp/yt-dlp' + _update_sources = TEST_UPDATE_SOURCES + + def _download_update_spec(self, *args, **kwargs): + return TEST_LOCKFILE_ACTUAL + + def _call_api(self, tag): + tag = f'tags/{tag}' if tag != 'latest' else tag + return TEST_API_DATA[f'{self.requested_repo}/{tag}'] + + def _report_error(self, msg, *args, **kwargs): + report_warning(msg) + + +class TestUpdate(unittest.TestCase): + maxDiff = None + + def test_update_spec(self): + ydl = FakeYDL() + updater = FakeUpdater(ydl, 'stable') + + def test(lockfile, identifier, input_tag, expect_tag, exact=False, repo='yt-dlp/yt-dlp'): + updater._identifier = identifier + updater._exact = exact + updater.requested_repo = repo + result = updater._process_update_spec(lockfile, input_tag) + self.assertEqual( + result, expect_tag, + f'{identifier!r} requesting {repo}@{input_tag} (exact={exact}) ' + f'returned {result!r} instead of {expect_tag!r}') + + for lockfile in (TEST_LOCKFILE_V1, TEST_LOCKFILE_V2, TEST_LOCKFILE_ACTUAL, TEST_LOCKFILE_FORK): + # Normal operation + test(lockfile, 'zip Python 3.12.0', '2023.12.31', '2023.12.31') + test(lockfile, 'zip stable Python 3.12.0', '2023.12.31', '2023.12.31', exact=True) + # Python 3.6 --update should update only to its lock + test(lockfile, 'zip Python 3.6.0', '2023.11.16', '2022.08.18.36') + # --update-to an exact version later than the lock should return None + test(lockfile, 'zip stable Python 3.6.0', '2023.11.16', None, exact=True) + # Python 3.7 should be able to update to its lock + test(lockfile, 'zip Python 3.7.0', '2023.11.16', '2023.11.16') + test(lockfile, 'zip stable Python 3.7.1', '2023.11.16', '2023.11.16', exact=True) + # Non-win_x86_exe builds on py3.7 must be locked + test(lockfile, 'zip Python 3.7.1', '2023.12.31', '2023.11.16') + test(lockfile, 'zip stable Python 3.7.1', '2023.12.31', None, exact=True) + test( # Windows Vista w/ win_x86_exe must be locked + lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', + '2023.12.31', '2023.11.16') + test( # Windows 2008Server w/ win_x86_exe must be locked + lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-2008Server', + '2023.12.31', None, exact=True) + test( # Windows 7 w/ win_x86_exe py3.7 build should be able to update beyond lock + lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', + '2023.12.31', '2023.12.31') + test( # Windows 8.1 w/ '2008Server' in platform string should be able to update beyond lock + lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-post2008Server-6.2.9200', + '2023.12.31', '2023.12.31', exact=True) + + # Forks can block updates to non-numeric tags rather than lock + test(TEST_LOCKFILE_FORK, 'zip Python 3.6.3', 'pr0000', None, repo='fork/yt-dlp') + test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr0000', 'pr0000', repo='fork/yt-dlp') + test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr1234', None, repo='fork/yt-dlp') + test(TEST_LOCKFILE_FORK, 'zip Python 3.8.1', 'pr1234', 'pr1234', repo='fork/yt-dlp', exact=True) + test( + TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', + 'pr1234', None, repo='fork/yt-dlp') + test( + TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', + '2023.12.31', '2023.12.31', repo='fork/yt-dlp') + test(TEST_LOCKFILE_FORK, 'zip Python 3.11.2', 'pr9999', None, repo='fork/yt-dlp', exact=True) + test(TEST_LOCKFILE_FORK, 'zip stable Python 3.12.0', 'pr9999', 'pr9999', repo='fork/yt-dlp') + + def test_query_update(self): + ydl = FakeYDL() + + def test(target, expected, current_version=None, current_commit=None, identifier=None): + updater = FakeUpdater(ydl, target) + if current_version: + updater.current_version = current_version + if current_commit: + updater.current_commit = current_commit + updater._identifier = identifier or 'zip' + update_info = updater.query_update(_output=True) + self.assertDictEqual( + update_info.__dict__ if update_info else {}, expected.__dict__ if expected else {}) + + test('yt-dlp/yt-dlp@latest', UpdateInfo( + '2023.12.31', version='2023.12.31', requested_version='2023.12.31', commit='b' * 40)) + test('yt-dlp/yt-dlp-nightly-builds@latest', UpdateInfo( + '2023.12.31.123456', version='2023.12.31.123456', requested_version='2023.12.31.123456', commit='c' * 40)) + test('yt-dlp/yt-dlp-master-builds@latest', UpdateInfo( + '2023.12.31.987654', version='2023.12.31.987654', requested_version='2023.12.31.987654', commit='d' * 40)) + test('fork/yt-dlp@latest', UpdateInfo( + '2050.12.31', version='2050.12.31', requested_version='2050.12.31', commit='e' * 40)) + test('fork/yt-dlp@pr0000', UpdateInfo( + 'pr0000', version='2023.11.11.000000', requested_version='2023.11.11.000000', commit='f' * 40)) + test('fork/yt-dlp@pr1234', UpdateInfo( + 'pr1234', version='2023.12.31.555555', requested_version='2023.12.31.555555', commit='0' * 40)) + test('fork/yt-dlp@pr9999', UpdateInfo( + 'pr9999', version=None, requested_version=None, commit='1' * 40)) + test('fork/yt-dlp-satellite@pr987', UpdateInfo( + 'pr987', version=None, requested_version=None, commit='2' * 40)) + test('yt-dlp/yt-dlp', None, current_version='2024.01.01') + test('stable', UpdateInfo( + '2023.12.31', version='2023.12.31', requested_version='2023.12.31', commit='b' * 40)) + test('nightly', UpdateInfo( + '2023.12.31.123456', version='2023.12.31.123456', requested_version='2023.12.31.123456', commit='c' * 40)) + test('master', UpdateInfo( + '2023.12.31.987654', version='2023.12.31.987654', requested_version='2023.12.31.987654', commit='d' * 40)) + test('testing', None, current_commit='9' * 40) + test('testing', UpdateInfo('testing', commit='9' * 40)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 0000000..a3073f0 --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,2457 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import re +import sys +import unittest +import warnings + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import contextlib +import io +import itertools +import json +import subprocess +import xml.etree.ElementTree + +from yt_dlp.compat import ( + compat_etree_fromstring, + compat_HTMLParseError, + compat_os_name, +) +from yt_dlp.utils import ( + Config, + DateRange, + ExtractorError, + InAdvancePagedList, + LazyList, + OnDemandPagedList, + Popen, + age_restricted, + args_to_str, + base_url, + caesar, + clean_html, + clean_podcast_url, + cli_bool_option, + cli_option, + cli_valueless_option, + date_from_str, + datetime_from_str, + detect_exe_version, + determine_ext, + determine_file_encoding, + dfxp2srt, + dict_get, + encode_base_n, + encode_compat_str, + encodeFilename, + expand_path, + extract_attributes, + extract_basic_auth, + find_xpath_attr, + fix_xml_ampersands, + float_or_none, + format_bytes, + get_compatible_ext, + get_element_by_attribute, + get_element_by_class, + get_element_html_by_attribute, + get_element_html_by_class, + get_element_text_and_html_by_tag, + get_elements_by_attribute, + get_elements_by_class, + get_elements_html_by_attribute, + get_elements_html_by_class, + get_elements_text_and_html_by_attribute, + int_or_none, + intlist_to_bytes, + iri_to_uri, + is_html, + js_to_json, + limit_length, + locked_file, + lowercase_escape, + match_str, + merge_dicts, + mimetype2ext, + month_by_name, + multipart_encode, + ohdave_rsa_encrypt, + orderedSet, + parse_age_limit, + parse_bitrate, + parse_codecs, + parse_count, + parse_dfxp_time_expr, + parse_duration, + parse_filesize, + parse_iso8601, + parse_qs, + parse_resolution, + pkcs1pad, + prepend_extension, + read_batch_urls, + remove_end, + remove_quotes, + remove_start, + render_table, + replace_extension, + rot47, + sanitize_filename, + sanitize_path, + sanitize_url, + shell_quote, + smuggle_url, + str_or_none, + str_to_int, + strip_jsonp, + strip_or_none, + subtitles_filename, + timeconvert, + traverse_obj, + try_call, + unescapeHTML, + unified_strdate, + unified_timestamp, + unsmuggle_url, + update_url_query, + uppercase_escape, + url_basename, + url_or_none, + urlencode_postdata, + urljoin, + urshift, + variadic, + version_tuple, + xpath_attr, + xpath_element, + xpath_text, + xpath_with_ns, +) +from yt_dlp.utils.networking import ( + HTTPHeaderDict, + escape_rfc3986, + normalize_url, + remove_dot_segments, +) + + +class TestUtil(unittest.TestCase): + def test_timeconvert(self): + self.assertTrue(timeconvert('') is None) + self.assertTrue(timeconvert('bougrg') is None) + + def test_sanitize_filename(self): + self.assertEqual(sanitize_filename(''), '') + self.assertEqual(sanitize_filename('abc'), 'abc') + self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e') + + self.assertEqual(sanitize_filename('123'), '123') + + self.assertEqual('abc⧸de', sanitize_filename('abc/de')) + self.assertFalse('/' in sanitize_filename('abc/de///')) + + self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', is_id=False)) + self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', is_id=False)) + self.assertEqual('yes no', sanitize_filename('yes? no', is_id=False)) + self.assertEqual('this - that', sanitize_filename('this: that', is_id=False)) + + self.assertEqual(sanitize_filename('AT&T'), 'AT&T') + aumlaut = 'ä' + self.assertEqual(sanitize_filename(aumlaut), aumlaut) + tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430' + self.assertEqual(sanitize_filename(tests), tests) + + self.assertEqual( + sanitize_filename('New World record at 0:12:34'), + 'New World record at 0_12_34') + + self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf') + self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') + self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf') + self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf') + self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') + self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf') + + forbidden = '"\0\\/' + for fc in forbidden: + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc)) + + def test_sanitize_filename_restricted(self): + self.assertEqual(sanitize_filename('abc', restricted=True), 'abc') + self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e') + + self.assertEqual(sanitize_filename('123', restricted=True), '123') + + self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True)) + self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True)) + + self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True)) + self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True)) + self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) + self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) + + tests = 'aäb\u4e2d\u56fd\u7684c' + self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c') + self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename + + forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' + for fc in forbidden: + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) + + # Handle a common case more neatly + self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song') + self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech') + # .. but make sure the file name is never empty + self.assertTrue(sanitize_filename('-', restricted=True) != '') + self.assertTrue(sanitize_filename(':', restricted=True) != '') + + self.assertEqual(sanitize_filename( + 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), + 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy') + + def test_sanitize_ids(self): + self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') + self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') + self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') + + def test_sanitize_path(self): + if sys.platform != 'win32': + return + + self.assertEqual(sanitize_path('abc'), 'abc') + self.assertEqual(sanitize_path('abc/def'), 'abc\\def') + self.assertEqual(sanitize_path('abc\\def'), 'abc\\def') + self.assertEqual(sanitize_path('abc|def'), 'abc#def') + self.assertEqual(sanitize_path('<>:"|?*'), '#######') + self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def') + self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def') + + self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc') + self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc') + + self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc') + self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f') + self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + + self.assertEqual( + sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'), + 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s') + + self.assertEqual( + sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'), + 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part') + self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#') + self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def') + self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#') + + self.assertEqual(sanitize_path('../abc'), '..\\abc') + self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc') + self.assertEqual(sanitize_path('./abc'), 'abc') + self.assertEqual(sanitize_path('./../abc'), '..\\abc') + + def test_sanitize_url(self): + self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') + self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') + self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') + self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') + self.assertEqual(sanitize_url('foo bar'), 'foo bar') + + def test_expand_path(self): + def env(var): + return f'%{var}%' if sys.platform == 'win32' else f'${var}' + + os.environ['yt_dlp_EXPATH_PATH'] = 'expanded' + self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded') + + old_home = os.environ.get('HOME') + test_str = R'C:\Documents and Settings\тест\Application Data' + try: + os.environ['HOME'] = test_str + self.assertEqual(expand_path(env('HOME')), os.getenv('HOME')) + self.assertEqual(expand_path('~'), os.getenv('HOME')) + self.assertEqual( + expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), + '%s/expanded' % os.getenv('HOME')) + finally: + os.environ['HOME'] = old_home or '' + + def test_prepend_extension(self): + self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') + self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') + self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') + self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp') + self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp') + self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext') + + def test_replace_extension(self): + self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp') + self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp') + self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') + self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp') + self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') + self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') + + def test_subtitles_filename(self): + self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt') + self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt') + self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt') + + def test_remove_start(self): + self.assertEqual(remove_start(None, 'A - '), None) + self.assertEqual(remove_start('A - B', 'A - '), 'B') + self.assertEqual(remove_start('B - A', 'A - '), 'B - A') + + def test_remove_end(self): + self.assertEqual(remove_end(None, ' - B'), None) + self.assertEqual(remove_end('A - B', ' - B'), 'A') + self.assertEqual(remove_end('B - A', ' - B'), 'B - A') + + def test_remove_quotes(self): + self.assertEqual(remove_quotes(None), None) + self.assertEqual(remove_quotes('"'), '"') + self.assertEqual(remove_quotes("'"), "'") + self.assertEqual(remove_quotes(';'), ';') + self.assertEqual(remove_quotes('";'), '";') + self.assertEqual(remove_quotes('""'), '') + self.assertEqual(remove_quotes('";"'), ';') + + def test_ordered_set(self): + self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) + self.assertEqual(orderedSet([]), []) + self.assertEqual(orderedSet([1]), [1]) + # keep the list ordered + self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) + + def test_unescape_html(self): + self.assertEqual(unescapeHTML('%20;'), '%20;') + self.assertEqual(unescapeHTML('/'), '/') + self.assertEqual(unescapeHTML('/'), '/') + self.assertEqual(unescapeHTML('é'), 'é') + self.assertEqual(unescapeHTML('�'), '�') + self.assertEqual(unescapeHTML('&a"'), '&a"') + # HTML5 entities + self.assertEqual(unescapeHTML('.''), '.\'') + + def test_date_from_str(self): + self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) + self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week')) + self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week')) + self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year')) + self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month')) + + def test_datetime_from_str(self): + self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto')) + self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto')) + self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto')) + self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto')) + self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto')) + self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto')) + self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto')) + self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto')) + + def test_daterange(self): + _20century = DateRange("19000101", "20000101") + self.assertFalse("17890714" in _20century) + _ac = DateRange("00010101") + self.assertTrue("19690721" in _ac) + _firstmilenium = DateRange(end="10000101") + self.assertTrue("07110427" in _firstmilenium) + + def test_unified_dates(self): + self.assertEqual(unified_strdate('December 21, 2010'), '20101221') + self.assertEqual(unified_strdate('8/7/2009'), '20090708') + self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') + self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') + self.assertEqual(unified_strdate('1968 12 10'), '19681210') + self.assertEqual(unified_strdate('1968-12-10'), '19681210') + self.assertEqual(unified_strdate('31-07-2022 20:00'), '20220731') + self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') + self.assertEqual( + unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), + '20141126') + self.assertEqual( + unified_strdate('2/2/2015 6:47:40 PM', day_first=False), + '20150202') + self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214') + self.assertEqual(unified_strdate('25-09-2014'), '20140925') + self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') + self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) + self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') + self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') + self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') + self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') + self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103') + self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023') + + def test_unified_timestamps(self): + self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) + self.assertEqual(unified_timestamp('8/7/2009'), 1247011200) + self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200) + self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598) + self.assertEqual(unified_timestamp('1968 12 10'), -33436800) + self.assertEqual(unified_timestamp('1968-12-10'), -33436800) + self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200) + self.assertEqual( + unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False), + 1417001400) + self.assertEqual( + unified_timestamp('2/2/2015 6:47:40 PM', day_first=False), + 1422902860) + self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900) + self.assertEqual(unified_timestamp('25-09-2014'), 1411603200) + self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) + self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) + self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) + self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) + self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) + self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) + self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) + self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363) + + self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1) + self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86) + self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78) + + def test_determine_ext(self): + self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') + self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) + self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None) + self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None) + self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8') + self.assertEqual(determine_ext('foobar', None), None) + + def test_find_xpath_attr(self): + testxml = '''<root> + <node/> + <node x="a"/> + <node x="a" y="c" /> + <node x="b" y="d" /> + <node x="" /> + </root>''' + doc = compat_etree_fromstring(testxml) + + self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None) + self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1]) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3]) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4]) + + def test_xpath_with_ns(self): + testxml = '''<root xmlns:media="http://example.com/"> + <media:song> + <media:author>The Author</media:author> + <url>http://server.com/download.mp3</url> + </media:song> + </root>''' + doc = compat_etree_fromstring(testxml) + find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) + self.assertTrue(find('media:song') is not None) + self.assertEqual(find('media:song/media:author').text, 'The Author') + self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') + + def test_xpath_element(self): + doc = xml.etree.ElementTree.Element('root') + div = xml.etree.ElementTree.SubElement(doc, 'div') + p = xml.etree.ElementTree.SubElement(div, 'p') + p.text = 'Foo' + self.assertEqual(xpath_element(doc, 'div/p'), p) + self.assertEqual(xpath_element(doc, ['div/p']), p) + self.assertEqual(xpath_element(doc, ['div/bar', 'div/p']), p) + self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default') + self.assertEqual(xpath_element(doc, ['div/bar'], default='default'), 'default') + self.assertTrue(xpath_element(doc, 'div/bar') is None) + self.assertTrue(xpath_element(doc, ['div/bar']) is None) + self.assertTrue(xpath_element(doc, ['div/bar'], 'div/baz') is None) + self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True) + self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar'], fatal=True) + self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar', 'div/baz'], fatal=True) + + def test_xpath_text(self): + testxml = '''<root> + <div> + <p>Foo</p> + </div> + </root>''' + doc = compat_etree_fromstring(testxml) + self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') + self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default') + self.assertTrue(xpath_text(doc, 'div/bar') is None) + self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True) + + def test_xpath_attr(self): + testxml = '''<root> + <div> + <p x="a">Foo</p> + </div> + </root>''' + doc = compat_etree_fromstring(testxml) + self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a') + self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None) + self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None) + self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default') + self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default') + self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True) + self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True) + + def test_smuggle_url(self): + data = {"ö": "ö", "abc": [3]} + url = 'https://foo.bar/baz?x=y#a' + smug_url = smuggle_url(url, data) + unsmug_url, unsmug_data = unsmuggle_url(smug_url) + self.assertEqual(url, unsmug_url) + self.assertEqual(data, unsmug_data) + + res_url, res_data = unsmuggle_url(url) + self.assertEqual(res_url, url) + self.assertEqual(res_data, None) + + smug_url = smuggle_url(url, {'a': 'b'}) + smug_smug_url = smuggle_url(smug_url, {'c': 'd'}) + res_url, res_data = unsmuggle_url(smug_smug_url) + self.assertEqual(res_url, url) + self.assertEqual(res_data, {'a': 'b', 'c': 'd'}) + + def test_shell_quote(self): + args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] + self.assertEqual( + shell_quote(args), + """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') + + def test_float_or_none(self): + self.assertEqual(float_or_none('42.42'), 42.42) + self.assertEqual(float_or_none('42'), 42.0) + self.assertEqual(float_or_none(''), None) + self.assertEqual(float_or_none(None), None) + self.assertEqual(float_or_none([]), None) + self.assertEqual(float_or_none(set()), None) + + def test_int_or_none(self): + self.assertEqual(int_or_none('42'), 42) + self.assertEqual(int_or_none(''), None) + self.assertEqual(int_or_none(None), None) + self.assertEqual(int_or_none([]), None) + self.assertEqual(int_or_none(set()), None) + + def test_str_to_int(self): + self.assertEqual(str_to_int('123,456'), 123456) + self.assertEqual(str_to_int('123.456'), 123456) + self.assertEqual(str_to_int(523), 523) + self.assertEqual(str_to_int('noninteger'), None) + self.assertEqual(str_to_int([]), None) + + def test_url_basename(self): + self.assertEqual(url_basename('http://foo.de/'), '') + self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz') + self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz') + self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz') + self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz') + self.assertEqual( + url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), + 'trailer.mp4') + + def test_base_url(self): + self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/') + self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/') + self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') + self.assertEqual(base_url('http://foo.de/bar/baz&x=z&w=y/x/c'), 'http://foo.de/bar/baz&x=z&w=y/x/') + + def test_urljoin(self): + self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') + self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') + self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') + self.assertEqual(urljoin('http://foo.de/', None), None) + self.assertEqual(urljoin('http://foo.de/', ''), None) + self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') + self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de') + self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de') + + def test_url_or_none(self): + self.assertEqual(url_or_none(None), None) + self.assertEqual(url_or_none(''), None) + self.assertEqual(url_or_none('foo'), None) + self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') + self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de') + self.assertEqual(url_or_none('http$://foo.de'), None) + self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') + self.assertEqual(url_or_none('//foo.de'), '//foo.de') + self.assertEqual(url_or_none('s3://foo.de'), None) + self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de') + self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de') + self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de') + self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de') + + def test_parse_age_limit(self): + self.assertEqual(parse_age_limit(None), None) + self.assertEqual(parse_age_limit(False), None) + self.assertEqual(parse_age_limit('invalid'), None) + self.assertEqual(parse_age_limit(0), 0) + self.assertEqual(parse_age_limit(18), 18) + self.assertEqual(parse_age_limit(21), 21) + self.assertEqual(parse_age_limit(22), None) + self.assertEqual(parse_age_limit('18'), 18) + self.assertEqual(parse_age_limit('18+'), 18) + self.assertEqual(parse_age_limit('PG-13'), 13) + self.assertEqual(parse_age_limit('TV-14'), 14) + self.assertEqual(parse_age_limit('TV-MA'), 17) + self.assertEqual(parse_age_limit('TV14'), 14) + self.assertEqual(parse_age_limit('TV_G'), 0) + + def test_parse_duration(self): + self.assertEqual(parse_duration(None), None) + self.assertEqual(parse_duration(False), None) + self.assertEqual(parse_duration('invalid'), None) + self.assertEqual(parse_duration('1'), 1) + self.assertEqual(parse_duration('1337:12'), 80232) + self.assertEqual(parse_duration('9:12:43'), 33163) + self.assertEqual(parse_duration('12:00'), 720) + self.assertEqual(parse_duration('00:01:01'), 61) + self.assertEqual(parse_duration('x:y'), None) + self.assertEqual(parse_duration('3h11m53s'), 11513) + self.assertEqual(parse_duration('3h 11m 53s'), 11513) + self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) + self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) + self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513) + self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513) + self.assertEqual(parse_duration('62m45s'), 3765) + self.assertEqual(parse_duration('6m59s'), 419) + self.assertEqual(parse_duration('49s'), 49) + self.assertEqual(parse_duration('0h0m0s'), 0) + self.assertEqual(parse_duration('0m0s'), 0) + self.assertEqual(parse_duration('0s'), 0) + self.assertEqual(parse_duration('01:02:03.05'), 3723.05) + self.assertEqual(parse_duration('T30M38S'), 1838) + self.assertEqual(parse_duration('5 s'), 5) + self.assertEqual(parse_duration('3 min'), 180) + self.assertEqual(parse_duration('2.5 hours'), 9000) + self.assertEqual(parse_duration('02:03:04'), 7384) + self.assertEqual(parse_duration('01:02:03:04'), 93784) + self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) + self.assertEqual(parse_duration('87 Min.'), 5220) + self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) + self.assertEqual(parse_duration('PT00H03M30SZ'), 210) + self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) + self.assertEqual(parse_duration('01:02:03:050'), 3723.05) + self.assertEqual(parse_duration('103:050'), 103.05) + self.assertEqual(parse_duration('1HR 3MIN'), 3780) + self.assertEqual(parse_duration('2hrs 3mins'), 7380) + + def test_fix_xml_ampersands(self): + self.assertEqual( + fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a') + self.assertEqual( + fix_xml_ampersands('"&x=y&wrong;&z=a'), + '"&x=y&wrong;&z=a') + self.assertEqual( + fix_xml_ampersands('&'><"'), + '&'><"') + self.assertEqual( + fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼') + self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#') + + def test_paged_list(self): + def testPL(size, pagesize, sliceargs, expected): + def get_page(pagenum): + firstid = pagenum * pagesize + upto = min(size, pagenum * pagesize + pagesize) + yield from range(firstid, upto) + + pl = OnDemandPagedList(get_page, pagesize) + got = pl.getslice(*sliceargs) + self.assertEqual(got, expected) + + iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize) + got = iapl.getslice(*sliceargs) + self.assertEqual(got, expected) + + testPL(5, 2, (), [0, 1, 2, 3, 4]) + testPL(5, 2, (1,), [1, 2, 3, 4]) + testPL(5, 2, (2,), [2, 3, 4]) + testPL(5, 2, (4,), [4]) + testPL(5, 2, (0, 3), [0, 1, 2]) + testPL(5, 2, (1, 4), [1, 2, 3]) + testPL(5, 2, (2, 99), [2, 3, 4]) + testPL(5, 2, (20, 99), []) + + def test_read_batch_urls(self): + f = io.StringIO('''\xef\xbb\xbf foo + bar\r + baz + # More after this line\r + ; or after this + bam''') + self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam']) + + def test_urlencode_postdata(self): + data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) + self.assertTrue(isinstance(data, bytes)) + + def test_update_url_query(self): + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), + parse_qs('http://example.com/path?quality=HD&format=mp4')) + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), + parse_qs('http://example.com/path?system=LINUX&system=WINDOWS')) + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path', {'fields': 'id,formats,subtitles'})), + parse_qs('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), + parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path?manifest=f4m', {'manifest': []})), + parse_qs('http://example.com/path')) + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), + parse_qs('http://example.com/path?system=LINUX')) + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path', {'fields': b'id,formats,subtitles'})), + parse_qs('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path', {'width': 1080, 'height': 720})), + parse_qs('http://example.com/path?width=1080&height=720')) + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path', {'bitrate': 5020.43})), + parse_qs('http://example.com/path?bitrate=5020.43')) + self.assertEqual(parse_qs(update_url_query( + 'http://example.com/path', {'test': '第二行тест'})), + parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) + + def test_multipart_encode(self): + self.assertEqual( + multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], + b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') + self.assertEqual( + multipart_encode({'欄位'.encode(): '值'.encode()}, boundary='AAAAAA')[0], + b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') + self.assertRaises( + ValueError, multipart_encode, {b'field': b'value'}, boundary='value') + + def test_dict_get(self): + FALSE_VALUES = { + 'none': None, + 'false': False, + 'zero': 0, + 'empty_string': '', + 'empty_list': [], + } + d = FALSE_VALUES.copy() + d['a'] = 42 + self.assertEqual(dict_get(d, 'a'), 42) + self.assertEqual(dict_get(d, 'b'), None) + self.assertEqual(dict_get(d, 'b', 42), 42) + self.assertEqual(dict_get(d, ('a', )), 42) + self.assertEqual(dict_get(d, ('b', 'a', )), 42) + self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42) + self.assertEqual(dict_get(d, ('b', 'c', )), None) + self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42) + for key, false_value in FALSE_VALUES.items(): + self.assertEqual(dict_get(d, ('b', 'c', key, )), None) + self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value) + + def test_merge_dicts(self): + self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2}) + self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1}) + self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1}) + self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1}) + self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''}) + self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'}) + self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'}) + + def test_encode_compat_str(self): + self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест') + self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест') + + def test_parse_iso8601(self): + self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) + self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) + self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) + self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266) + self.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251) + self.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None) + + def test_strip_jsonp(self): + stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);') + d = json.loads(stripped) + self.assertEqual(d, [{"id": "532cb", "x": 3}]) + + stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc') + d = json.loads(stripped) + self.assertEqual(d, {'STATUS': 'OK'}) + + stripped = strip_jsonp('ps.embedHandler({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + + stripped = strip_jsonp('window.cb && window.cb({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + + stripped = strip_jsonp('window.cb && cb({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + + stripped = strip_jsonp('({"status": "success"});') + d = json.loads(stripped) + self.assertEqual(d, {'status': 'success'}) + + def test_strip_or_none(self): + self.assertEqual(strip_or_none(' abc'), 'abc') + self.assertEqual(strip_or_none('abc '), 'abc') + self.assertEqual(strip_or_none(' abc '), 'abc') + self.assertEqual(strip_or_none('\tabc\t'), 'abc') + self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc') + self.assertEqual(strip_or_none('abc'), 'abc') + self.assertEqual(strip_or_none(''), '') + self.assertEqual(strip_or_none(None), None) + self.assertEqual(strip_or_none(42), None) + self.assertEqual(strip_or_none([]), None) + + def test_uppercase_escape(self): + self.assertEqual(uppercase_escape('aä'), 'aä') + self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') + + def test_lowercase_escape(self): + self.assertEqual(lowercase_escape('aä'), 'aä') + self.assertEqual(lowercase_escape('\\u0026'), '&') + + def test_limit_length(self): + self.assertEqual(limit_length(None, 12), None) + self.assertEqual(limit_length('foo', 12), 'foo') + self.assertTrue( + limit_length('foo bar baz asd', 12).startswith('foo bar')) + self.assertTrue('...' in limit_length('foo bar baz asd', 12)) + + def test_mimetype2ext(self): + self.assertEqual(mimetype2ext(None), None) + self.assertEqual(mimetype2ext('video/x-flv'), 'flv') + self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8') + self.assertEqual(mimetype2ext('text/vtt'), 'vtt') + self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') + self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + self.assertEqual(mimetype2ext('audio/x-wav'), 'wav') + self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav') + + def test_month_by_name(self): + self.assertEqual(month_by_name(None), None) + self.assertEqual(month_by_name('December', 'en'), 12) + self.assertEqual(month_by_name('décembre', 'fr'), 12) + self.assertEqual(month_by_name('December'), 12) + self.assertEqual(month_by_name('décembre'), None) + self.assertEqual(month_by_name('Unknown', 'unknown'), None) + + def test_parse_codecs(self): + self.assertEqual(parse_codecs(''), {}) + self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { + 'vcodec': 'avc1.77.30', + 'acodec': 'mp4a.40.2', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs('mp4a.40.2'), { + 'vcodec': 'none', + 'acodec': 'mp4a.40.2', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { + 'vcodec': 'avc1.42001e', + 'acodec': 'mp4a.40.5', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs('avc3.640028'), { + 'vcodec': 'avc3.640028', + 'acodec': 'none', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs(', h264,,newcodec,aac'), { + 'vcodec': 'h264', + 'acodec': 'aac', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs('av01.0.05M.08'), { + 'vcodec': 'av01.0.05M.08', + 'acodec': 'none', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs('vp9.2'), { + 'vcodec': 'vp9.2', + 'acodec': 'none', + 'dynamic_range': 'HDR10', + }) + self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { + 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0', + 'acodec': 'none', + 'dynamic_range': 'HDR10', + }) + self.assertEqual(parse_codecs('dvhe'), { + 'vcodec': 'dvhe', + 'acodec': 'none', + 'dynamic_range': 'DV', + }) + self.assertEqual(parse_codecs('theora, vorbis'), { + 'vcodec': 'theora', + 'acodec': 'vorbis', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { + 'vcodec': 'unknownvcodec', + 'acodec': 'unknownacodec', + }) + self.assertEqual(parse_codecs('unknown'), {}) + + def test_escape_rfc3986(self): + reserved = "!*'();:@&=+$,/?#[]" + unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' + self.assertEqual(escape_rfc3986(reserved), reserved) + self.assertEqual(escape_rfc3986(unreserved), unreserved) + self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82') + self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82') + self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar') + self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar') + + def test_normalize_url(self): + self.assertEqual( + normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), + 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4' + ) + self.assertEqual( + normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), + 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290' + ) + self.assertEqual( + normalize_url('http://тест.рф/фрагмент'), + 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' + ) + self.assertEqual( + normalize_url('http://тест.рф/абв?абв=абв#абв'), + 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' + ) + self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') + + self.assertEqual(normalize_url('http://www.example.com/../a/b/../c/./d.html'), 'http://www.example.com/a/c/d.html') + + def test_remove_dot_segments(self): + self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g') + self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6') + self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd') + self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/') + self.assertEqual(remove_dot_segments('/..'), '/') + self.assertEqual(remove_dot_segments('/./'), '/') + self.assertEqual(remove_dot_segments('/./a'), '/a') + self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi') + self.assertEqual(remove_dot_segments('/'), '/') + self.assertEqual(remove_dot_segments('/t'), '/t') + self.assertEqual(remove_dot_segments('t'), 't') + self.assertEqual(remove_dot_segments(''), '') + self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c') + self.assertEqual(remove_dot_segments('../a'), 'a') + self.assertEqual(remove_dot_segments('./a'), 'a') + self.assertEqual(remove_dot_segments('.'), '') + self.assertEqual(remove_dot_segments('////'), '////') + + def test_js_to_json_vars_strings(self): + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'null': a, + 'nullStr': b, + 'true': c, + 'trueStr': d, + 'false': e, + 'falseStr': f, + 'unresolvedVar': g, + }''', + { + 'a': 'null', + 'b': '"null"', + 'c': 'true', + 'd': '"true"', + 'e': 'false', + 'f': '"false"', + 'g': 'var', + } + )), + { + 'null': None, + 'nullStr': 'null', + 'true': True, + 'trueStr': 'true', + 'false': False, + 'falseStr': 'false', + 'unresolvedVar': 'var' + } + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'int': a, + 'intStr': b, + 'float': c, + 'floatStr': d, + }''', + { + 'a': '123', + 'b': '"123"', + 'c': '1.23', + 'd': '"1.23"', + } + )), + { + 'int': 123, + 'intStr': '123', + 'float': 1.23, + 'floatStr': '1.23', + } + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'object': a, + 'objectStr': b, + 'array': c, + 'arrayStr': d, + }''', + { + 'a': '{}', + 'b': '"{}"', + 'c': '[]', + 'd': '"[]"', + } + )), + { + 'object': {}, + 'objectStr': '{}', + 'array': [], + 'arrayStr': '[]', + } + ) + + def test_js_to_json_realworld(self): + inp = '''{ + 'clip':{'provider':'pseudo'} + }''' + self.assertEqual(js_to_json(inp), '''{ + "clip":{"provider":"pseudo"} + }''') + json.loads(js_to_json(inp)) + + inp = '''{ + 'playlist':[{'controls':{'all':null}}] + }''' + self.assertEqual(js_to_json(inp), '''{ + "playlist":[{"controls":{"all":null}}] + }''') + + inp = '''"The CW\\'s \\'Crazy Ex-Girlfriend\\'"''' + self.assertEqual(js_to_json(inp), '''"The CW's 'Crazy Ex-Girlfriend'"''') + + inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' + json_code = js_to_json(inp) + self.assertEqual(json.loads(json_code), json.loads(inp)) + + inp = '''{ + 0:{src:'skipped', type: 'application/dash+xml'}, + 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'}, + }''' + self.assertEqual(js_to_json(inp), '''{ + "0":{"src":"skipped", "type": "application/dash+xml"}, + "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} + }''') + + inp = '''{"foo":101}''' + self.assertEqual(js_to_json(inp), '''{"foo":101}''') + + inp = '''{"duration": "00:01:07"}''' + self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') + + inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''' + self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''') + + def test_js_to_json_edgecases(self): + on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") + self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) + + on = js_to_json('{"abc": true}') + self.assertEqual(json.loads(on), {'abc': True}) + + # Ignore JavaScript code as well + on = js_to_json('''{ + "x": 1, + y: "a", + z: some.code + }''') + d = json.loads(on) + self.assertEqual(d['x'], 1) + self.assertEqual(d['y'], 'a') + + # Just drop ! prefix for now though this results in a wrong value + on = js_to_json('''{ + a: !0, + b: !1, + c: !!0, + d: !!42.42, + e: !!![], + f: !"abc", + g: !"", + !42: 42 + }''') + self.assertEqual(json.loads(on), { + 'a': 0, + 'b': 1, + 'c': 0, + 'd': 42.42, + 'e': [], + 'f': "abc", + 'g': "", + '42': 42 + }) + + on = js_to_json('["abc", "def",]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('{"abc": "def",}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + + on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + + on = js_to_json('{ 0: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + + on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + + on = js_to_json('{ 0: // comment\n1 }') + self.assertEqual(json.loads(on), {'0': 1}) + + on = js_to_json(r'["<p>x<\/p>"]') + self.assertEqual(json.loads(on), ['<p>x</p>']) + + on = js_to_json(r'["\xaa"]') + self.assertEqual(json.loads(on), ['\u00aa']) + + on = js_to_json("['a\\\nb']") + self.assertEqual(json.loads(on), ['ab']) + + on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/") + self.assertEqual(json.loads(on), ['ab']) + + on = js_to_json('{0xff:0xff}') + self.assertEqual(json.loads(on), {'255': 255}) + + on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}') + self.assertEqual(json.loads(on), {'255': 255}) + + on = js_to_json('{077:077}') + self.assertEqual(json.loads(on), {'63': 63}) + + on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}') + self.assertEqual(json.loads(on), {'63': 63}) + + on = js_to_json('{42:42}') + self.assertEqual(json.loads(on), {'42': 42}) + + on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}') + self.assertEqual(json.loads(on), {'42': 42}) + + on = js_to_json('{42:4.2e1}') + self.assertEqual(json.loads(on), {'42': 42.0}) + + on = js_to_json('{ "0x40": "0x40" }') + self.assertEqual(json.loads(on), {'0x40': '0x40'}) + + on = js_to_json('{ "040": "040" }') + self.assertEqual(json.loads(on), {'040': '040'}) + + on = js_to_json('[1,//{},\n2]') + self.assertEqual(json.loads(on), [1, 2]) + + on = js_to_json(R'"\^\$\#"') + self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped') + + on = js_to_json('\'"\\""\'') + self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped') + + on = js_to_json('[new Date("spam"), \'("eggs")\']') + self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string') + + def test_js_to_json_malformed(self): + self.assertEqual(js_to_json('42a1'), '42"a1"') + self.assertEqual(js_to_json('42a-1'), '42"a"-1') + + def test_js_to_json_template_literal(self): + self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"') + self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"') + self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"') + self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""') + self.assertEqual(js_to_json('`${name}`', {}), '"name"') + + def test_js_to_json_common_constructors(self): + self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5}) + self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10]) + self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5]) + self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5}) + self.assertEqual(json.loads(js_to_json('new Date("123")')), "123") + self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19") + + def test_extract_attributes(self): + self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) + self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) + self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'}) + self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"}) + self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'}) + self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) + self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) + self.assertEqual(extract_attributes('<e x="&">'), {'x': '&'}) # XML + self.assertEqual(extract_attributes('<e x=""">'), {'x': '"'}) + self.assertEqual(extract_attributes('<e x="£">'), {'x': '£'}) # HTML 3.2 + self.assertEqual(extract_attributes('<e x="λ">'), {'x': 'λ'}) # HTML 4.0 + self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'}) + self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"}) + self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'}) + self.assertEqual(extract_attributes('<e x >'), {'x': None}) + self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None}) + self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'}) + self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'}) + self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'}) + self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'}) + self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'}) + self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'}) + self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased + self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'}) + self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'}) + self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'}) + self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'}) + self.assertEqual(extract_attributes('<e x="décomposé">'), {'x': 'décompose\u0301'}) + # "Narrow" Python builds don't support unicode code points outside BMP. + try: + chr(0x10000) + supports_outside_bmp = True + except ValueError: + supports_outside_bmp = False + if supports_outside_bmp: + self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'}) + # Malformed HTML should not break attributes extraction on older Python + self.assertEqual(extract_attributes('<mal"formed/>'), {}) + + def test_clean_html(self): + self.assertEqual(clean_html('a:\nb'), 'a: b') + self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') + self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb') + + def test_intlist_to_bytes(self): + self.assertEqual( + intlist_to_bytes([0, 1, 127, 128, 255]), + b'\x00\x01\x7f\x80\xff') + + def test_args_to_str(self): + self.assertEqual( + args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), + 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""' + ) + + def test_parse_filesize(self): + self.assertEqual(parse_filesize(None), None) + self.assertEqual(parse_filesize(''), None) + self.assertEqual(parse_filesize('91 B'), 91) + self.assertEqual(parse_filesize('foobar'), None) + self.assertEqual(parse_filesize('2 MiB'), 2097152) + self.assertEqual(parse_filesize('5 GB'), 5000000000) + self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) + self.assertEqual(parse_filesize('1.2tb'), 1200000000000) + self.assertEqual(parse_filesize('1,24 KB'), 1240) + self.assertEqual(parse_filesize('1,24 kb'), 1240) + self.assertEqual(parse_filesize('8.5 megabytes'), 8500000) + + def test_parse_count(self): + self.assertEqual(parse_count(None), None) + self.assertEqual(parse_count(''), None) + self.assertEqual(parse_count('0'), 0) + self.assertEqual(parse_count('1000'), 1000) + self.assertEqual(parse_count('1.000'), 1000) + self.assertEqual(parse_count('1.1k'), 1100) + self.assertEqual(parse_count('1.1 k'), 1100) + self.assertEqual(parse_count('1,1 k'), 1100) + self.assertEqual(parse_count('1.1kk'), 1100000) + self.assertEqual(parse_count('1.1kk '), 1100000) + self.assertEqual(parse_count('1,1kk'), 1100000) + self.assertEqual(parse_count('100 views'), 100) + self.assertEqual(parse_count('1,100 views'), 1100) + self.assertEqual(parse_count('1.1kk views'), 1100000) + self.assertEqual(parse_count('10M views'), 10000000) + self.assertEqual(parse_count('has 10M views'), 10000000) + + def test_parse_resolution(self): + self.assertEqual(parse_resolution(None), {}) + self.assertEqual(parse_resolution(''), {}) + self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('720p'), {'height': 720}) + self.assertEqual(parse_resolution('4k'), {'height': 2160}) + self.assertEqual(parse_resolution('8K'), {'height': 4320}) + self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('ep1x2'), {}) + self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) + + def test_parse_bitrate(self): + self.assertEqual(parse_bitrate(None), None) + self.assertEqual(parse_bitrate(''), None) + self.assertEqual(parse_bitrate('300kbps'), 300) + self.assertEqual(parse_bitrate('1500kbps'), 1500) + self.assertEqual(parse_bitrate('300 kbps'), 300) + + def test_version_tuple(self): + self.assertEqual(version_tuple('1'), (1,)) + self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) + self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style + + def test_detect_exe_version(self): + self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1 +built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4) +configuration: --prefix=/usr --extra-'''), '1.2.1') + self.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685 +built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685') + self.assertEqual(detect_exe_version('''X server found. dri2 connection failed! +Trying to open render node... +Success at /dev/dri/renderD128. +ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') + + def test_age_restricted(self): + self.assertFalse(age_restricted(None, 10)) # unrestricted content + self.assertFalse(age_restricted(1, None)) # unrestricted policy + self.assertFalse(age_restricted(8, 10)) + self.assertTrue(age_restricted(18, 14)) + self.assertFalse(age_restricted(18, 18)) + + def test_is_html(self): + self.assertFalse(is_html(b'\x49\x44\x43<html')) + self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa')) + self.assertTrue(is_html( # UTF-8 with BOM + b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa')) + self.assertTrue(is_html( # UTF-16-LE + b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00' + )) + self.assertTrue(is_html( # UTF-16-BE + b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4' + )) + self.assertTrue(is_html( # UTF-32-BE + b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4')) + self.assertTrue(is_html( # UTF-32-LE + b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) + + def test_render_table(self): + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]]), + 'a empty bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]], + hide_empty=True), + 'a bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['\ta', 'bcd'], + [['1\t23', 4], ['\t9999', 51]]), + ' a bcd\n' + '1 23 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]], + delim='-'), + 'a bcd\n' + '--------\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]], + delim='-', extra_gap=2), + 'a bcd\n' + '----------\n' + '123 4\n' + '9999 51') + + def test_match_str(self): + # Unary + self.assertFalse(match_str('xy', {'x': 1200})) + self.assertTrue(match_str('!xy', {'x': 1200})) + self.assertTrue(match_str('x', {'x': 1200})) + self.assertFalse(match_str('!x', {'x': 1200})) + self.assertTrue(match_str('x', {'x': 0})) + self.assertTrue(match_str('is_live', {'is_live': True})) + self.assertFalse(match_str('is_live', {'is_live': False})) + self.assertFalse(match_str('is_live', {'is_live': None})) + self.assertFalse(match_str('is_live', {})) + self.assertFalse(match_str('!is_live', {'is_live': True})) + self.assertTrue(match_str('!is_live', {'is_live': False})) + self.assertTrue(match_str('!is_live', {'is_live': None})) + self.assertTrue(match_str('!is_live', {})) + self.assertTrue(match_str('title', {'title': 'abc'})) + self.assertTrue(match_str('title', {'title': ''})) + self.assertFalse(match_str('!title', {'title': 'abc'})) + self.assertFalse(match_str('!title', {'title': ''})) + + # Numeric + self.assertFalse(match_str('x>0', {'x': 0})) + self.assertFalse(match_str('x>0', {})) + self.assertTrue(match_str('x>?0', {})) + self.assertTrue(match_str('x>1K', {'x': 1200})) + self.assertFalse(match_str('x>2K', {'x': 1200})) + self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) + self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) + self.assertTrue(match_str('x > 1:0:0', {'x': 3700})) + + # String + self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) + self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) + self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) + self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) + self.assertTrue(match_str('y^=foo', {'y': 'foobar42'})) + self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'})) + self.assertFalse(match_str('y^=bar', {'y': 'foobar42'})) + self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'})) + self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42}) + self.assertTrue(match_str('y*=bar', {'y': 'foobar42'})) + self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'})) + self.assertFalse(match_str('y*=baz', {'y': 'foobar42'})) + self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'})) + self.assertTrue(match_str('y$=42', {'y': 'foobar42'})) + self.assertFalse(match_str('y$=43', {'y': 'foobar42'})) + + # And + self.assertFalse(match_str( + 'like_count > 100 & dislike_count <? 50 & description', + {'like_count': 90, 'description': 'foo'})) + self.assertTrue(match_str( + 'like_count > 100 & dislike_count <? 50 & description', + {'like_count': 190, 'description': 'foo'})) + self.assertFalse(match_str( + 'like_count > 100 & dislike_count <? 50 & description', + {'like_count': 190, 'dislike_count': 60, 'description': 'foo'})) + self.assertFalse(match_str( + 'like_count > 100 & dislike_count <? 50 & description', + {'like_count': 190, 'dislike_count': 10})) + + # Regex + self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'})) + self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'})) + self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'})) + self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'})) + + # Quotes + self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'})) + self.assertFalse(match_str(r'x^="foo "', {'x': 'foo "bar"'})) + self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'})) + self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'})) + + # Escaping & + self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'})) + self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'})) + self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'})) + self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'})) + + # Example from docs + self.assertTrue(match_str( + r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'", + {'description': 'Raining Cats & Dogs'})) + + # Incomplete + self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True)) + self.assertTrue(match_str('x', {'id': 'foo'}, True)) + self.assertTrue(match_str('!x', {'id': 'foo'}, True)) + self.assertFalse(match_str('x', {'id': 'foo'}, False)) + + def test_parse_dfxp_time_expr(self): + self.assertEqual(parse_dfxp_time_expr(None), None) + self.assertEqual(parse_dfxp_time_expr(''), None) + self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) + self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) + self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) + self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1) + self.assertEqual(parse_dfxp_time_expr('00:00:01:100'), 1.1) + + def test_dfxp2srt(self): + dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?> + <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> + <body> + <div xml:lang="en"> + <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> + <p begin="1" end="2">第二行<br/>♪♪</p> + <p begin="2" dur="1"><span>Third<br/>Line</span></p> + <p begin="3" end="-1">Lines with invalid timestamps are ignored</p> + <p begin="-1" end="-1">Ignore, two</p> + <p begin="3" dur="-1">Ignored, three</p> + </div> + </body> + </tt>'''.encode() + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The following line contains Chinese characters and special symbols + +2 +00:00:01,000 --> 00:00:02,000 +第二行 +♪♪ + +3 +00:00:02,000 --> 00:00:03,000 +Third +Line + +''' + self.assertEqual(dfxp2srt(dfxp_data), srt_data) + + dfxp_data_no_default_namespace = b'''<?xml version="1.0" encoding="UTF-8"?> + <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> + <body> + <div xml:lang="en"> + <p begin="0" end="1">The first line</p> + </div> + </body> + </tt>''' + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The first line + +''' + self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) + + dfxp_data_with_style = b'''<?xml version="1.0" encoding="utf-8"?> +<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata"> + <head> + <styling> + <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" /> + <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" /> + <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" /> + <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" /> + </styling> + </head> + <body tts:textAlign="center" style="s0"> + <div> + <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p> + <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p> + <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p> + <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p> + </div> + </body> +</tt>''' + srt_data = '''1 +00:00:02,080 --> 00:00:05,840 +<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> + +2 +00:00:02,080 --> 00:00:05,840 +<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1 +</font>part 2</font></b> + +3 +00:00:05,840 --> 00:00:09,560 +<u><font color="lime">line 3 +part 3</font></u> + +4 +00:00:09,560 --> 00:00:12,360 +<i><u><font color="yellow"><font color="lime">inner + </font>style</font></u></i> + +''' + self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data) + + dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?> + <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> + <body> + <div xml:lang="en"> + <p begin="0" end="1">Line 1</p> + <p begin="1" end="2">第二行</p> + </div> + </body> + </tt>'''.encode('utf-16') + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +Line 1 + +2 +00:00:01,000 --> 00:00:02,000 +第二行 + +''' + self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data) + + def test_cli_option(self): + self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) + self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) + self.assertEqual(cli_option({}, '--proxy', 'proxy'), []) + self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10']) + + def test_cli_valueless_option(self): + self.assertEqual(cli_valueless_option( + {'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader']) + self.assertEqual(cli_valueless_option( + {'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), []) + self.assertEqual(cli_valueless_option( + {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate']) + self.assertEqual(cli_valueless_option( + {'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), []) + self.assertEqual(cli_valueless_option( + {'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), []) + self.assertEqual(cli_valueless_option( + {'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate']) + + def test_cli_bool_option(self): + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), + ['--no-check-certificate', 'true']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='), + ['--no-check-certificate=true']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), + ['--check-certificate', 'false']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), + ['--check-certificate=false']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), + ['--check-certificate', 'true']) + self.assertEqual( + cli_bool_option( + {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), + ['--check-certificate=true']) + self.assertEqual( + cli_bool_option( + {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), + []) + + def test_ohdave_rsa_encrypt(self): + N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd + e = 65537 + + self.assertEqual( + ohdave_rsa_encrypt(b'aa111222', e, N), + '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881') + + def test_pkcs1pad(self): + data = [1, 2, 3] + padded_data = pkcs1pad(data, 32) + self.assertEqual(padded_data[:2], [0, 2]) + self.assertEqual(padded_data[28:], [0, 1, 2, 3]) + + self.assertRaises(ValueError, pkcs1pad, data, 8) + + def test_encode_base_n(self): + self.assertEqual(encode_base_n(0, 30), '0') + self.assertEqual(encode_base_n(80, 30), '2k') + + custom_table = '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA' + self.assertEqual(encode_base_n(0, 30, custom_table), '9') + self.assertEqual(encode_base_n(80, 30, custom_table), '7P') + + self.assertRaises(ValueError, encode_base_n, 0, 70) + self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) + + def test_caesar(self): + self.assertEqual(caesar('ace', 'abcdef', 2), 'cea') + self.assertEqual(caesar('cea', 'abcdef', -2), 'ace') + self.assertEqual(caesar('ace', 'abcdef', -2), 'eac') + self.assertEqual(caesar('eac', 'abcdef', 2), 'ace') + self.assertEqual(caesar('ace', 'abcdef', 0), 'ace') + self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz') + self.assertEqual(caesar('abc', 'acegik', 2), 'ebg') + self.assertEqual(caesar('ebg', 'acegik', -2), 'abc') + + def test_rot47(self): + self.assertEqual(rot47('yt-dlp'), r'JE\5=A') + self.assertEqual(rot47('YT-DLP'), r'*%\s{!') + + def test_urshift(self): + self.assertEqual(urshift(3, 1), 1) + self.assertEqual(urshift(-3, 1), 2147483646) + + GET_ELEMENT_BY_CLASS_TEST_STRING = ''' + <span class="foo bar">nice</span> + ''' + + def test_get_element_by_class(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_by_class('foo', html), 'nice') + self.assertEqual(get_element_by_class('no-such-class', html), None) + + def test_get_element_html_by_class(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_html_by_class('foo', html), html.strip()) + self.assertEqual(get_element_by_class('no-such-class', html), None) + + GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = ''' + <div itemprop="author" itemscope>foo</div> + ''' + + def test_get_element_by_attribute(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice') + self.assertEqual(get_element_by_attribute('class', 'foo', html), None) + self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None) + + html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING + + self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo') + + def test_get_element_html_by_attribute(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip()) + self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None) + self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None) + + html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING + + self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip()) + + GET_ELEMENTS_BY_CLASS_TEST_STRING = ''' + <span class="foo bar">nice</span><span class="foo bar">also nice</span> + ''' + GET_ELEMENTS_BY_CLASS_RES = ['<span class="foo bar">nice</span>', '<span class="foo bar">also nice</span>'] + + def test_get_elements_by_class(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice']) + self.assertEqual(get_elements_by_class('no-such-class', html), []) + + def test_get_elements_html_by_class(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES) + self.assertEqual(get_elements_html_by_class('no-such-class', html), []) + + def test_get_elements_by_attribute(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice']) + self.assertEqual(get_elements_by_attribute('class', 'foo', html), []) + self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), []) + + def test_get_elements_html_by_attribute(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES) + self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), []) + self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), []) + + def test_get_elements_text_and_html_by_attribute(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual( + list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)), + list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES))) + self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), []) + self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), []) + + self.assertEqual(list(get_elements_text_and_html_by_attribute( + 'class', 'foo', '<a class="foo">nice</a><span class="foo">nice</span>', tag='a')), [('nice', '<a class="foo">nice</a>')]) + + GET_ELEMENT_BY_TAG_TEST_STRING = ''' + random text lorem ipsum</p> + <div> + this should be returned + <span>this should also be returned</span> + <div> + this should also be returned + </div> + closing tag above should not trick, so this should also be returned + </div> + but this text should not be returned + ''' + GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276] + GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6] + GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119] + GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7] + + def test_get_element_text_and_html_by_tag(self): + html = self.GET_ELEMENT_BY_TAG_TEST_STRING + + self.assertEqual( + get_element_text_and_html_by_tag('div', html), + (self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML)) + self.assertEqual( + get_element_text_and_html_by_tag('span', html), + (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML)) + self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html) + + def test_iri_to_uri(self): + self.assertEqual( + iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'), + 'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b') # Same + self.assertEqual( + iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'), # German for cheese sauce stirring spoon + 'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel') + self.assertEqual( + iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'), + 'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#') + self.assertEqual( + iri_to_uri('http://правозащита38.рф/category/news/'), + 'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/') + self.assertEqual( + iri_to_uri('http://www.правозащита38.рф/category/news/'), + 'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/') + self.assertEqual( + iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'), + 'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA') + self.assertEqual( + iri_to_uri('http://日本語.jp/'), + 'http://xn--wgv71a119e.jp/') + self.assertEqual( + iri_to_uri('http://导航.中国/'), + 'http://xn--fet810g.xn--fiqs8s/') + + def test_clean_podcast_url(self): + self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') + self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') + self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661') + self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3') + + def test_LazyList(self): + it = list(range(10)) + + self.assertEqual(list(LazyList(it)), it) + self.assertEqual(LazyList(it).exhaust(), it) + self.assertEqual(LazyList(it)[5], it[5]) + + self.assertEqual(LazyList(it)[5:], it[5:]) + self.assertEqual(LazyList(it)[:5], it[:5]) + self.assertEqual(LazyList(it)[::2], it[::2]) + self.assertEqual(LazyList(it)[1::2], it[1::2]) + self.assertEqual(LazyList(it)[5::-1], it[5::-1]) + self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2]) + self.assertEqual(LazyList(it)[::-1], it[::-1]) + + self.assertTrue(LazyList(it)) + self.assertFalse(LazyList(range(0))) + self.assertEqual(len(LazyList(it)), len(it)) + self.assertEqual(repr(LazyList(it)), repr(it)) + self.assertEqual(str(LazyList(it)), str(it)) + + self.assertEqual(list(LazyList(it, reverse=True)), it[::-1]) + self.assertEqual(list(reversed(LazyList(it))[::-1]), it) + self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) + + def test_LazyList_laziness(self): + + def test(ll, idx, val, cache): + self.assertEqual(ll[idx], val) + self.assertEqual(ll._cache, list(cache)) + + ll = LazyList(range(10)) + test(ll, 0, 0, range(1)) + test(ll, 5, 5, range(6)) + test(ll, -3, 7, range(10)) + + ll = LazyList(range(10), reverse=True) + test(ll, -1, 0, range(1)) + test(ll, 3, 6, range(10)) + + ll = LazyList(itertools.count()) + test(ll, 10, 10, range(11)) + ll = reversed(ll) + test(ll, -15, 14, range(15)) + + def test_format_bytes(self): + self.assertEqual(format_bytes(0), '0.00B') + self.assertEqual(format_bytes(1000), '1000.00B') + self.assertEqual(format_bytes(1024), '1.00KiB') + self.assertEqual(format_bytes(1024**2), '1.00MiB') + self.assertEqual(format_bytes(1024**3), '1.00GiB') + self.assertEqual(format_bytes(1024**4), '1.00TiB') + self.assertEqual(format_bytes(1024**5), '1.00PiB') + self.assertEqual(format_bytes(1024**6), '1.00EiB') + self.assertEqual(format_bytes(1024**7), '1.00ZiB') + self.assertEqual(format_bytes(1024**8), '1.00YiB') + self.assertEqual(format_bytes(1024**9), '1024.00YiB') + + def test_hide_login_info(self): + self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']), + ['-u', 'PRIVATE', '-p', 'PRIVATE']) + self.assertEqual(Config.hide_login_info(['-u']), ['-u']) + self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']), + ['-u', 'PRIVATE', '-u', 'PRIVATE']) + self.assertEqual(Config.hide_login_info(['--username=foo']), + ['--username=PRIVATE']) + + def test_locked_file(self): + TEXT = 'test_locked_file\n' + FILE = 'test_locked_file.ytdl' + MODES = 'war' # Order is important + + try: + for lock_mode in MODES: + with locked_file(FILE, lock_mode, False) as f: + if lock_mode == 'r': + self.assertEqual(f.read(), TEXT * 2, 'Wrong file content') + else: + f.write(TEXT) + for test_mode in MODES: + testing_write = test_mode != 'r' + try: + with locked_file(FILE, test_mode, False): + pass + except (BlockingIOError, PermissionError): + if not testing_write: # FIXME + print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})') + continue + self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}') + else: + self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}') + finally: + with contextlib.suppress(OSError): + os.remove(FILE) + + def test_determine_file_encoding(self): + self.assertEqual(determine_file_encoding(b''), (None, 0)) + self.assertEqual(determine_file_encoding(b'--verbose -x --audio-format mkv\n'), (None, 0)) + + self.assertEqual(determine_file_encoding(b'\xef\xbb\xbf'), ('utf-8', 3)) + self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4)) + self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2)) + + self.assertEqual(determine_file_encoding(b'\xff\xfe# coding: utf-8\n--verbose'), ('utf-16-le', 2)) + + self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0)) + self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0)) + + self.assertEqual(determine_file_encoding(b'#coding:utf-8\n--verbose'), ('utf-8', 0)) + self.assertEqual(determine_file_encoding(b'# coding: utf-8 \r\n--verbose'), ('utf-8', 0)) + + self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0)) + self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0)) + + def test_get_compatible_ext(self): + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv') + + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm') + + self.assertEqual(get_compatible_ext( + vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') + self.assertEqual(get_compatible_ext( + vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm') + + self.assertEqual(get_compatible_ext( + vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4') + self.assertEqual(get_compatible_ext( + vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv') + + def test_try_call(self): + def total(*x, **kwargs): + return sum(x) + sum(kwargs.values()) + + self.assertEqual(try_call(None), None, + msg='not a fn should give None') + self.assertEqual(try_call(lambda: 1), 1, + msg='int fn with no expected_type should give int') + self.assertEqual(try_call(lambda: 1, expected_type=int), 1, + msg='int fn with expected_type int should give int') + self.assertEqual(try_call(lambda: 1, expected_type=dict), None, + msg='int fn with wrong expected_type should give None') + self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1, + msg='fn should accept arglist') + self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1, + msg='fn should accept kwargs') + self.assertEqual(try_call(lambda: 1, expected_type=dict), None, + msg='int fn with no expected_type should give None') + self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42, + msg='expect first int result with expected_type int') + + def test_variadic(self): + self.assertEqual(variadic(None), (None, )) + self.assertEqual(variadic('spam'), ('spam', )) + self.assertEqual(variadic('spam', allowed_types=dict), 'spam') + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam') + + def test_traverse_obj(self): + _TEST_DATA = { + 100: 100, + 1.2: 1.2, + 'str': 'str', + 'None': None, + '...': ..., + 'urls': [ + {'index': 0, 'url': 'https://www.example.com/0'}, + {'index': 1, 'url': 'https://www.example.com/1'}, + ], + 'data': ( + {'index': 2}, + {'index': 3}, + ), + 'dict': {}, + } + + # Test base functionality + self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str', + msg='allow tuple path') + self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str', + msg='allow list path') + self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str', + msg='allow iterable path') + self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str', + msg='single items should be treated as a path') + self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA) + self.assertEqual(traverse_obj(_TEST_DATA, 100), 100) + self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2) + + # Test Ellipsis behavior + self.assertCountEqual(traverse_obj(_TEST_DATA, ...), + (item for item in _TEST_DATA.values() if item not in (None, {})), + msg='`...` should give all non discarded values') + self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, ...)), _TEST_DATA['urls'][0].values(), + msg='`...` selection for dicts should select all values') + self.assertEqual(traverse_obj(_TEST_DATA, (..., ..., 'url')), + ['https://www.example.com/0', 'https://www.example.com/1'], + msg='nested `...` queries should work') + self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4), + msg='`...` query result should be flattened') + self.assertEqual(traverse_obj(iter(range(4)), ...), list(range(4)), + msg='`...` should accept iterables') + + # Test function as key + self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)), + [_TEST_DATA['urls']], + msg='function as query key should perform a filter based on (key, value)') + self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'}, + msg='exceptions in the query function should be catched') + self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2], + msg='function key should accept iterables') + if __debug__: + with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'): + traverse_obj(_TEST_DATA, lambda a: ...) + with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'): + traverse_obj(_TEST_DATA, lambda a, b, c: ...) + + # Test set as key (transformation/type, like `expected_type`) + self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper}, )), ['STR'], + msg='Function in set should be a transformation') + self.assertEqual(traverse_obj(_TEST_DATA, (..., {str})), ['str'], + msg='Type in set should be a type filter') + self.assertEqual(traverse_obj(_TEST_DATA, {dict}), _TEST_DATA, + msg='A single set should be wrapped into a path') + self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper})), ['STR'], + msg='Transformation function should not raise') + self.assertEqual(traverse_obj(_TEST_DATA, (..., {str_or_none})), + [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None], + msg='Function in set should be a transformation') + self.assertEqual(traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})), 'const', + msg='Function in set should always be called') + if __debug__: + with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'): + traverse_obj(_TEST_DATA, set()) + with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'): + traverse_obj(_TEST_DATA, {str.upper, str}) + + # Test `slice` as a key + _SLICE_DATA = [0, 1, 2, 3, 4] + self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None, + msg='slice on a dictionary should not throw') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1], + msg='slice key should apply slice to sequence') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2], + msg='slice key should apply slice to sequence') + self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2], + msg='slice key should apply slice to sequence') + + # Test alternative paths + self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str', + msg='multiple `paths` should be treated as alternative paths') + self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str', + msg='alternatives should exit early') + self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None, + msg='alternatives should return `default` if exhausted') + self.assertEqual(traverse_obj(_TEST_DATA, (..., 'fail'), 100), 100, + msg='alternatives should track their own branching return') + self.assertEqual(traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)), list(_TEST_DATA['data']), + msg='alternatives on empty objects should search further') + + # Test branch and path nesting + self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'], + msg='tuple as key should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'], + msg='list as key should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'], + msg='double nesting in path should be treated as paths') + self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1], + msg='do not fail early on branching') + self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))), + ['https://www.example.com/0', 'https://www.example.com/1'], + msg='tripple nesting in path should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))), + ['https://www.example.com/0', 'https://www.example.com/1'], + msg='ellipsis as branch path start gets flattened') + + # Test dictionary as key + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2}, + msg='dict key should result in a dict with the same keys') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}), + {0: 'https://www.example.com/0'}, + msg='dict key should allow paths') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}), + {0: ['https://www.example.com/0']}, + msg='tuple in dict path should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}), + {0: ['https://www.example.com/0']}, + msg='double nesting in dict path should be treated as paths') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}), + {0: ['https://www.example.com/1', 'https://www.example.com/0']}, + msg='tripple nesting in dict path should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {}, + msg='remove `None` values when top level dict key fails') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=...), {0: ...}, + msg='use `default` if key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {}, + msg='remove empty values when dict key') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=...), {0: ...}, + msg='use `default` when dict key and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {}, + msg='remove empty values when nested dict key fails') + self.assertEqual(traverse_obj(None, {0: 'fail'}), {}, + msg='default to dict if pruned') + self.assertEqual(traverse_obj(None, {0: 'fail'}, default=...), {0: ...}, + msg='default to dict if pruned and default is given') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...), {0: {0: ...}}, + msg='use nested `default` when nested dict key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', ...)}), {}, + msg='remove key if branch in dict key not successful') + + # Testing default parameter behavior + _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []} + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None, + msg='default value should be `None`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...), ..., + msg='chained fails should result in default') + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0, + msg='should not short cirquit on `None`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1, + msg='invalid dict key should result in `default`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1, + msg='`None` is a deliberate sentinel and should become `default`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None, + msg='`IndexError` should result in `default`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1), 1, + msg='if branched but not successful return `default` if defined, not `[]`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None), None, + msg='if branched but not successful return `default` even if `default` is `None`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail')), [], + msg='if branched but not successful return `[]`, not `default`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', ...)), [], + msg='if branched but object is empty return `[]`, not `default`') + self.assertEqual(traverse_obj(None, ...), [], + msg='if branched but object is `None` return `[]`, not `default`') + self.assertEqual(traverse_obj({0: None}, (0, ...)), [], + msg='if branched but state is `None` return `[]`, not `default`') + + branching_paths = [ + ('fail', ...), + (..., 'fail'), + 100 * ('fail',) + (...,), + (...,) + 100 * ('fail',), + ] + for branching_path in branching_paths: + self.assertEqual(traverse_obj({}, branching_path), [], + msg='if branched but state is `None`, return `[]` (not `default`)') + self.assertEqual(traverse_obj({}, 'fail', branching_path), [], + msg='if branching in last alternative and previous did not match, return `[]` (not `default`)') + self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x', + msg='if branching in last alternative and previous did match, return single value') + self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x', + msg='if branching in first alternative and non-branching path does match, return single value') + self.assertEqual(traverse_obj({}, branching_path, 'fail'), None, + msg='if branching in first alternative and non-branching path does not match, return `default`') + + # Testing expected_type behavior + _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0} + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str), + 'str', msg='accept matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), + None, msg='reject non matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)), + '0', msg='transform type using type function') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0), + None, msg='wrap expected_type fuction in try_call') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str), + ['str'], msg='eliminate items that expected_type fails on') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int), + {0: 100}, msg='type as expected_type should filter dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none), + {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values') + self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int), + 1, msg='expected_type should not filter non final dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int), + {0: {0: 100}}, msg='expected_type should transform deep dict values') + self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)), + [{0: ...}, {0: ...}], msg='expected_type should transform branched dict values') + self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int), + [4], msg='expected_type regression for type matching in tuple branching') + self.assertEqual(traverse_obj(_TEST_DATA, ['data', ...], expected_type=int), + [], msg='expected_type regression for type matching in dict result') + + # Test get_all behavior + _GET_ALL_DATA = {'key': [0, 1, 2]} + self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False), 0, + msg='if not `get_all`, return only first matching value') + self.assertEqual(traverse_obj(_GET_ALL_DATA, ..., get_all=False), [0, 1, 2], + msg='do not overflatten if not `get_all`') + + # Test casesense behavior + _CASESENSE_DATA = { + 'KeY': 'value0', + 0: { + 'KeY': 'value1', + 0: {'KeY': 'value2'}, + }, + } + self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None, + msg='dict keys should be case sensitive unless `casesense`') + self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY', + casesense=False), 'value0', + msg='allow non matching key case if `casesense`') + self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)), + casesense=False), ['value1'], + msg='allow non matching key case in branch if `casesense`') + self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)), + casesense=False), ['value2'], + msg='allow non matching key case in branch path if `casesense`') + + # Test traverse_string behavior + _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2} + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None, + msg='do not traverse into string if not `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0), + traverse_string=True), 's', + msg='traverse into string if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1), + traverse_string=True), '.', + msg='traverse into converted data if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...), + traverse_string=True), 'str', + msg='`...` should result in string (same value) if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), + traverse_string=True), 'sr', + msg='`slice` should result in string if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"), + traverse_string=True), 'str', + msg='function should result in string if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), + traverse_string=True), ['s', 'r'], + msg='branching should result in list if `traverse_string`') + self.assertEqual(traverse_obj({}, (0, ...), traverse_string=True), [], + msg='branching should result in list if `traverse_string`') + self.assertEqual(traverse_obj({}, (0, lambda x, y: True), traverse_string=True), [], + msg='branching should result in list if `traverse_string`') + self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [], + msg='branching should result in list if `traverse_string`') + + # Test re.Match as input obj + mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123') + self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None], + msg='`...` on a `re.Match` should give its `groups()`') + self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'], + msg='function on a `re.Match` should give groupno, value starting at 0') + self.assertEqual(traverse_obj(mobj, 'group'), '3', + msg='str key on a `re.Match` should give group with that name') + self.assertEqual(traverse_obj(mobj, 2), '3', + msg='int key on a `re.Match` should give group with that name') + self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3', + msg='str key on a `re.Match` should respect casesense') + self.assertEqual(traverse_obj(mobj, 'fail'), None, + msg='failing str key on a `re.Match` should return `default`') + self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None, + msg='failing str key on a `re.Match` should return `default`') + self.assertEqual(traverse_obj(mobj, 8), None, + msg='failing int key on a `re.Match` should return `default`') + self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'], + msg='function on a `re.Match` should give group name as well') + + # Test xml.etree.ElementTree.Element as input obj + etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?> + <data> + <country name="Liechtenstein"> + <rank>1</rank> + <year>2008</year> + <gdppc>141100</gdppc> + <neighbor name="Austria" direction="E"/> + <neighbor name="Switzerland" direction="W"/> + </country> + <country name="Singapore"> + <rank>4</rank> + <year>2011</year> + <gdppc>59900</gdppc> + <neighbor name="Malaysia" direction="N"/> + </country> + <country name="Panama"> + <rank>68</rank> + <year>2011</year> + <gdppc>13600</gdppc> + <neighbor name="Costa Rica" direction="W"/> + <neighbor name="Colombia" direction="E"/> + </country> + </data>''') + self.assertEqual(traverse_obj(etree, ''), etree, + msg='empty str key should return the element itself') + self.assertEqual(traverse_obj(etree, 'country'), list(etree), + msg='str key should lead all children with that tag name') + self.assertEqual(traverse_obj(etree, ...), list(etree), + msg='`...` as key should return all children') + self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]], + msg='function as key should get element as value') + self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]], + msg='function as key should get index as key') + self.assertEqual(traverse_obj(etree, 0), etree[0], + msg='int key should return the nth child') + self.assertEqual(traverse_obj(etree, './/neighbor/@name'), + ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'], + msg='`@<attribute>` at end of path should give that attribute') + self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None], + msg='`@<nonexistant>` at end of path should give `None`') + self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'}, + msg='`@` should give the full attribute dict') + self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'], + msg='`text()` at end of path should give the inner text') + self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'], + msg='full Python xpath features should be supported') + self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein', + msg='special transformations should act on current element') + self.assertEqual(traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})), [1, 2008, 141100], + msg='special transformations should act on current element') + + def test_http_header_dict(self): + headers = HTTPHeaderDict() + headers['ytdl-test'] = b'0' + self.assertEqual(list(headers.items()), [('Ytdl-Test', '0')]) + headers['ytdl-test'] = 1 + self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')]) + headers['Ytdl-test'] = '2' + self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')]) + self.assertTrue('ytDl-Test' in headers) + self.assertEqual(str(headers), str(dict(headers))) + self.assertEqual(repr(headers), str(dict(headers))) + + headers.update({'X-dlp': 'data'}) + self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')}) + self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'}) + self.assertEqual(len(headers), 2) + self.assertEqual(headers.copy(), headers) + headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'}) + self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')}) + self.assertEqual(len(headers2), 2) + headers2.clear() + self.assertEqual(len(headers2), 0) + + # ensure we prefer latter headers + headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2}) + self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')}) + del headers3['ytdl-tesT'] + self.assertEqual(dict(headers3), {}) + + headers4 = HTTPHeaderDict({'ytdl-test': 'data;'}) + self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')}) + + # common mistake: strip whitespace from values + # https://github.com/yt-dlp/yt-dlp/issues/8729 + headers5 = HTTPHeaderDict({'ytdl-test': ' data; '}) + self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')}) + + def test_extract_basic_auth(self): + assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None) + assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None) + assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==') + assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=') + assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') + assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') + + @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') + def test_Popen_windows_escaping(self): + def run_shell(args): + stdout, stderr, error = Popen.run( + args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + assert not stderr + assert not error + return stdout + + # Test escaping + assert run_shell(['echo', 'test"&']) == '"test""&"\n' + # Test if delayed expansion is disabled + assert run_shell(['echo', '^!']) == '"^!"\n' + assert run_shell('echo "^!"') == '"^!"\n' + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py new file mode 100644 index 0000000..21ce10a --- /dev/null +++ b/test/test_verbose_output.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import subprocess + +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +class TestVerboseOutput(unittest.TestCase): + def test_private_info_arg(self): + outp = subprocess.Popen( + [ + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', + '--username', 'johnsmith@gmail.com', + '--password', 'my_secret_password', + ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue(b'--username' in serr) + self.assertTrue(b'johnsmith' not in serr) + self.assertTrue(b'--password' in serr) + self.assertTrue(b'my_secret_password' not in serr) + + def test_private_info_shortarg(self): + outp = subprocess.Popen( + [ + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', + '-u', 'johnsmith@gmail.com', + '-p', 'my_secret_password', + ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue(b'-u' in serr) + self.assertTrue(b'johnsmith' not in serr) + self.assertTrue(b'-p' in serr) + self.assertTrue(b'my_secret_password' not in serr) + + def test_private_info_eq(self): + outp = subprocess.Popen( + [ + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', + '--username=johnsmith@gmail.com', + '--password=my_secret_password', + ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue(b'--username' in serr) + self.assertTrue(b'johnsmith' not in serr) + self.assertTrue(b'--password' in serr) + self.assertTrue(b'my_secret_password' not in serr) + + def test_private_info_shortarg_eq(self): + outp = subprocess.Popen( + [ + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', + '-u=johnsmith@gmail.com', + '-p=my_secret_password', + ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = outp.communicate() + self.assertTrue(b'-u' in serr) + self.assertTrue(b'johnsmith' not in serr) + self.assertTrue(b'-p' in serr) + self.assertTrue(b'my_secret_password' not in serr) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_websockets.py b/test/test_websockets.py new file mode 100644 index 0000000..13b3a1e --- /dev/null +++ b/test/test_websockets.py @@ -0,0 +1,383 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +import pytest + +from test.helper import verify_address_availability + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import http.client +import http.cookiejar +import http.server +import json +import random +import ssl +import threading + +from yt_dlp import socks +from yt_dlp.cookies import YoutubeDLCookieJar +from yt_dlp.dependencies import websockets +from yt_dlp.networking import Request +from yt_dlp.networking.exceptions import ( + CertificateVerifyError, + HTTPError, + ProxyError, + RequestError, + SSLError, + TransportError, +) +from yt_dlp.utils.networking import HTTPHeaderDict + +from test.conftest import validate_and_send + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def websocket_handler(websocket): + for message in websocket: + if isinstance(message, bytes): + if message == b'bytes': + return websocket.send('2') + elif isinstance(message, str): + if message == 'headers': + return websocket.send(json.dumps(dict(websocket.request.headers))) + elif message == 'path': + return websocket.send(websocket.request.path) + elif message == 'source_address': + return websocket.send(websocket.remote_address[0]) + elif message == 'str': + return websocket.send('1') + return websocket.send(message) + + +def process_request(self, request): + if request.path.startswith('/gen_'): + status = http.HTTPStatus(int(request.path[5:])) + if 300 <= status.value <= 300: + return websockets.http11.Response( + status.value, status.phrase, websockets.datastructures.Headers([('Location', '/')]), b'') + return self.protocol.reject(status.value, status.phrase) + return self.protocol.accept(request) + + +def create_websocket_server(**ws_kwargs): + import websockets.sync.server + wsd = websockets.sync.server.serve(websocket_handler, '127.0.0.1', 0, process_request=process_request, **ws_kwargs) + ws_port = wsd.socket.getsockname()[1] + ws_server_thread = threading.Thread(target=wsd.serve_forever) + ws_server_thread.daemon = True + ws_server_thread.start() + return ws_server_thread, ws_port + + +def create_ws_websocket_server(): + return create_websocket_server() + + +def create_wss_websocket_server(): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + return create_websocket_server(ssl_context=sslctx) + + +MTLS_CERT_DIR = os.path.join(TEST_DIR, 'testdata', 'certificate') + + +def create_mtls_wss_websocket_server(): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + cacertfn = os.path.join(MTLS_CERT_DIR, 'ca.crt') + + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.verify_mode = ssl.CERT_REQUIRED + sslctx.load_verify_locations(cafile=cacertfn) + sslctx.load_cert_chain(certfn, None) + + return create_websocket_server(ssl_context=sslctx) + + +@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers') +class TestWebsSocketRequestHandlerConformance: + @classmethod + def setup_class(cls): + cls.ws_thread, cls.ws_port = create_ws_websocket_server() + cls.ws_base_url = f'ws://127.0.0.1:{cls.ws_port}' + + cls.wss_thread, cls.wss_port = create_wss_websocket_server() + cls.wss_base_url = f'wss://127.0.0.1:{cls.wss_port}' + + cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl_context=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)) + cls.bad_wss_host = f'wss://127.0.0.1:{cls.bad_wss_port}' + + cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server() + cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}' + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_basic_websockets(self, handler): + with handler() as rh: + ws = validate_and_send(rh, Request(self.ws_base_url)) + assert 'upgrade' in ws.headers + assert ws.status == 101 + ws.send('foo') + assert ws.recv() == 'foo' + ws.close() + + # https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6 + @pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)]) + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_send_types(self, handler, msg, opcode): + with handler() as rh: + ws = validate_and_send(rh, Request(self.ws_base_url)) + ws.send(msg) + assert int(ws.recv()) == opcode + ws.close() + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_verify_cert(self, handler): + with handler() as rh: + with pytest.raises(CertificateVerifyError): + validate_and_send(rh, Request(self.wss_base_url)) + + with handler(verify=False) as rh: + ws = validate_and_send(rh, Request(self.wss_base_url)) + assert ws.status == 101 + ws.close() + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_ssl_error(self, handler): + with handler(verify=False) as rh: + with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info: + validate_and_send(rh, Request(self.bad_wss_host)) + assert not issubclass(exc_info.type, CertificateVerifyError) + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + @pytest.mark.parametrize('path,expected', [ + # Unicode characters should be encoded with uppercase percent-encoding + ('/中文', '/%E4%B8%AD%E6%96%87'), + # don't normalize existing percent encodings + ('/%c7%9f', '/%c7%9f'), + ]) + def test_percent_encode(self, handler, path, expected): + with handler() as rh: + ws = validate_and_send(rh, Request(f'{self.ws_base_url}{path}')) + ws.send('path') + assert ws.recv() == expected + assert ws.status == 101 + ws.close() + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_remove_dot_segments(self, handler): + with handler() as rh: + # This isn't a comprehensive test, + # but it should be enough to check whether the handler is removing dot segments + ws = validate_and_send(rh, Request(f'{self.ws_base_url}/a/b/./../../test')) + assert ws.status == 101 + ws.send('path') + assert ws.recv() == '/test' + ws.close() + + # We are restricted to known HTTP status codes in http.HTTPStatus + # Redirects are not supported for websockets + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + @pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511)) + def test_raise_http_error(self, handler, status): + with handler() as rh: + with pytest.raises(HTTPError) as exc_info: + validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}')) + assert exc_info.value.status == status + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + @pytest.mark.parametrize('params,extensions', [ + ({'timeout': sys.float_info.min}, {}), + ({}, {'timeout': sys.float_info.min}), + ]) + def test_timeout(self, handler, params, extensions): + with handler(**params) as rh: + with pytest.raises(TransportError): + validate_and_send(rh, Request(self.ws_base_url, extensions=extensions)) + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_cookies(self, handler): + cookiejar = YoutubeDLCookieJar() + cookiejar.set_cookie(http.cookiejar.Cookie( + version=0, name='test', value='ytdlp', port=None, port_specified=False, + domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/', + path_specified=True, secure=False, expires=None, discard=False, comment=None, + comment_url=None, rest={})) + + with handler(cookiejar=cookiejar) as rh: + ws = validate_and_send(rh, Request(self.ws_base_url)) + ws.send('headers') + assert json.loads(ws.recv())['cookie'] == 'test=ytdlp' + ws.close() + + with handler() as rh: + ws = validate_and_send(rh, Request(self.ws_base_url)) + ws.send('headers') + assert 'cookie' not in json.loads(ws.recv()) + ws.close() + + ws = validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': cookiejar})) + ws.send('headers') + assert json.loads(ws.recv())['cookie'] == 'test=ytdlp' + ws.close() + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_source_address(self, handler): + source_address = f'127.0.0.{random.randint(5, 255)}' + verify_address_availability(source_address) + with handler(source_address=source_address) as rh: + ws = validate_and_send(rh, Request(self.ws_base_url)) + ws.send('source_address') + assert source_address == ws.recv() + ws.close() + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_response_url(self, handler): + with handler() as rh: + url = f'{self.ws_base_url}/something' + ws = validate_and_send(rh, Request(url)) + assert ws.url == url + ws.close() + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_request_headers(self, handler): + with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: + # Global Headers + ws = validate_and_send(rh, Request(self.ws_base_url)) + ws.send('headers') + headers = HTTPHeaderDict(json.loads(ws.recv())) + assert headers['test1'] == 'test' + ws.close() + + # Per request headers, merged with global + ws = validate_and_send(rh, Request( + self.ws_base_url, headers={'test2': 'changed', 'test3': 'test3'})) + ws.send('headers') + headers = HTTPHeaderDict(json.loads(ws.recv())) + assert headers['test1'] == 'test' + assert headers['test2'] == 'changed' + assert headers['test3'] == 'test3' + ws.close() + + @pytest.mark.parametrize('client_cert', ( + {'client_certificate': os.path.join(MTLS_CERT_DIR, 'clientwithkey.crt')}, + { + 'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'), + 'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'client.key'), + }, + { + 'client_certificate': os.path.join(MTLS_CERT_DIR, 'clientwithencryptedkey.crt'), + 'client_certificate_password': 'foobar', + }, + { + 'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'), + 'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'clientencrypted.key'), + 'client_certificate_password': 'foobar', + } + )) + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_mtls(self, handler, client_cert): + with handler( + # Disable client-side validation of unacceptable self-signed testcert.pem + # The test is of a check on the server side, so unaffected + verify=False, + client_cert=client_cert + ) as rh: + validate_and_send(rh, Request(self.mtls_wss_base_url)).close() + + +def create_fake_ws_connection(raised): + import websockets.sync.client + + class FakeWsConnection(websockets.sync.client.ClientConnection): + def __init__(self, *args, **kwargs): + class FakeResponse: + body = b'' + headers = {} + status_code = 101 + reason_phrase = 'test' + + self.response = FakeResponse() + + def send(self, *args, **kwargs): + raise raised() + + def recv(self, *args, **kwargs): + raise raised() + + def close(self, *args, **kwargs): + return + + return FakeWsConnection() + + +@pytest.mark.parametrize('handler', ['Websockets'], indirect=True) +class TestWebsocketsRequestHandler: + @pytest.mark.parametrize('raised,expected', [ + # https://websockets.readthedocs.io/en/stable/reference/exceptions.html + (lambda: websockets.exceptions.InvalidURI(msg='test', uri='test://'), RequestError), + # Requires a response object. Should be covered by HTTP error tests. + # (lambda: websockets.exceptions.InvalidStatus(), TransportError), + (lambda: websockets.exceptions.InvalidHandshake(), TransportError), + # These are subclasses of InvalidHandshake + (lambda: websockets.exceptions.InvalidHeader(name='test'), TransportError), + (lambda: websockets.exceptions.NegotiationError(), TransportError), + # Catch-all + (lambda: websockets.exceptions.WebSocketException(), TransportError), + (lambda: TimeoutError(), TransportError), + # These may be raised by our create_connection implementation, which should also be caught + (lambda: OSError(), TransportError), + (lambda: ssl.SSLError(), SSLError), + (lambda: ssl.SSLCertVerificationError(), CertificateVerifyError), + (lambda: socks.ProxyError(), ProxyError), + ]) + def test_request_error_mapping(self, handler, monkeypatch, raised, expected): + import websockets.sync.client + + import yt_dlp.networking._websockets + with handler() as rh: + def fake_connect(*args, **kwargs): + raise raised() + monkeypatch.setattr(yt_dlp.networking._websockets, 'create_connection', lambda *args, **kwargs: None) + monkeypatch.setattr(websockets.sync.client, 'connect', fake_connect) + with pytest.raises(expected) as exc_info: + rh.send(Request('ws://fake-url')) + assert exc_info.type is expected + + @pytest.mark.parametrize('raised,expected,match', [ + # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send + (lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None), + (lambda: RuntimeError(), TransportError, None), + (lambda: TimeoutError(), TransportError, None), + (lambda: TypeError(), RequestError, None), + (lambda: socks.ProxyError(), ProxyError, None), + # Catch-all + (lambda: websockets.exceptions.WebSocketException(), TransportError, None), + ]) + def test_ws_send_error_mapping(self, handler, monkeypatch, raised, expected, match): + from yt_dlp.networking._websockets import WebsocketsResponseAdapter + ws = WebsocketsResponseAdapter(create_fake_ws_connection(raised), url='ws://fake-url') + with pytest.raises(expected, match=match) as exc_info: + ws.send('test') + assert exc_info.type is expected + + @pytest.mark.parametrize('raised,expected,match', [ + # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv + (lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None), + (lambda: RuntimeError(), TransportError, None), + (lambda: TimeoutError(), TransportError, None), + (lambda: socks.ProxyError(), ProxyError, None), + # Catch-all + (lambda: websockets.exceptions.WebSocketException(), TransportError, None), + ]) + def test_ws_recv_error_mapping(self, handler, monkeypatch, raised, expected, match): + from yt_dlp.networking._websockets import WebsocketsResponseAdapter + ws = WebsocketsResponseAdapter(create_fake_ws_connection(raised), url='ws://fake-url') + with pytest.raises(expected, match=match) as exc_info: + ws.recv() + assert exc_info.type is expected diff --git a/test/test_write_annotations.py.disabled b/test/test_write_annotations.py.disabled new file mode 100644 index 0000000..c7cf199 --- /dev/null +++ b/test/test_write_annotations.py.disabled @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import xml.etree.ElementTree + +import yt_dlp.extractor +import yt_dlp.YoutubeDL +from test.helper import get_params, is_download_test, try_rm + + +class YoutubeDL(yt_dlp.YoutubeDL): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.to_stderr = self.to_screen + + +params = get_params({ + 'writeannotations': True, + 'skip_download': True, + 'writeinfojson': False, + 'format': 'flv', +}) + + +TEST_ID = 'gr51aVj-mLg' +ANNOTATIONS_FILE = TEST_ID + '.annotations.xml' +EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] + + +@is_download_test +class TestAnnotations(unittest.TestCase): + def setUp(self): + # Clear old files + self.tearDown() + + def test_info_json(self): + expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text. + ie = yt_dlp.extractor.YoutubeIE() + ydl = YoutubeDL(params) + ydl.add_info_extractor(ie) + ydl.download([TEST_ID]) + self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) + annoxml = None + with open(ANNOTATIONS_FILE, encoding='utf-8') as annof: + annoxml = xml.etree.ElementTree.parse(annof) + self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') + root = annoxml.getroot() + self.assertEqual(root.tag, 'document') + annotationsTag = root.find('annotations') + self.assertEqual(annotationsTag.tag, 'annotations') + annotations = annotationsTag.findall('annotation') + + # Not all the annotations have TEXT children and the annotations are returned unsorted. + for a in annotations: + self.assertEqual(a.tag, 'annotation') + if a.get('type') == 'text': + textTag = a.find('TEXT') + text = textTag.text + self.assertTrue(text in expected) # assertIn only added in python 2.7 + # remove the first occurrence, there could be more than one annotation with the same text + expected.remove(text) + # We should have seen (and removed) all the expected annotation texts. + self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') + + def tearDown(self): + try_rm(ANNOTATIONS_FILE) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py new file mode 100644 index 0000000..b3f323e --- /dev/null +++ b/test/test_youtube_lists.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import FakeYDL, is_download_test +from yt_dlp.extractor import YoutubeIE, YoutubeTabIE +from yt_dlp.utils import ExtractorError + + +@is_download_test +class TestYoutubeLists(unittest.TestCase): + def assertIsPlaylist(self, info): + """Make sure the info has '_type' set to 'playlist'""" + self.assertEqual(info['_type'], 'playlist') + + def test_youtube_playlist_noplaylist(self): + dl = FakeYDL() + dl.params['noplaylist'] = True + ie = YoutubeTabIE(dl) + result = ie.extract('https://www.youtube.com/watch?v=OmJ-4B-mS-Y&list=PLydZ2Hrp_gPRJViZjLFKaBMgCQOYEEkyp&index=2') + self.assertEqual(result['_type'], 'url') + self.assertEqual(result['ie_key'], YoutubeIE.ie_key()) + self.assertEqual(YoutubeIE.extract_id(result['url']), 'OmJ-4B-mS-Y') + + def test_youtube_mix(self): + dl = FakeYDL() + ie = YoutubeTabIE(dl) + result = ie.extract('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8') + entries = list(result['entries']) + self.assertTrue(len(entries) >= 50) + original_video = entries[0] + self.assertEqual(original_video['id'], 'tyITL_exICo') + + def test_youtube_flat_playlist_extraction(self): + dl = FakeYDL() + dl.params['extract_flat'] = True + ie = YoutubeTabIE(dl) + result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc') + self.assertIsPlaylist(result) + entries = list(result['entries']) + self.assertTrue(len(entries) == 1) + video = entries[0] + self.assertEqual(video['_type'], 'url') + self.assertEqual(video['ie_key'], 'Youtube') + self.assertEqual(video['id'], 'BaW_jenozKc') + self.assertEqual(video['url'], 'https://www.youtube.com/watch?v=BaW_jenozKc') + self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐') + self.assertEqual(video['duration'], 10) + self.assertEqual(video['uploader'], 'Philipp Hagemeister') + + def test_youtube_channel_no_uploads(self): + dl = FakeYDL() + dl.params['extract_flat'] = True + ie = YoutubeTabIE(dl) + # no uploads + with self.assertRaisesRegex(ExtractorError, r'no uploads'): + ie.extract('https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA') + + # no uploads and no UCID given + with self.assertRaisesRegex(ExtractorError, r'no uploads'): + ie.extract('https://www.youtube.com/news') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py new file mode 100644 index 0000000..81be5d3 --- /dev/null +++ b/test/test_youtube_misc.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from yt_dlp.extractor import YoutubeIE + + +class TestYoutubeMisc(unittest.TestCase): + def test_youtube_extract(self): + assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) + assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') + assertExtractId('BaW_jenozKc', 'BaW_jenozKc') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py new file mode 100644 index 0000000..c559284 --- /dev/null +++ b/test/test_youtube_signature.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys +import unittest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import contextlib +import re +import string +import urllib.request + +from test.helper import FakeYDL, is_download_test +from yt_dlp.extractor import YoutubeIE +from yt_dlp.jsinterp import JSInterpreter + +_SIG_TESTS = [ + ( + 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', + 86, + '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', + 85, + '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', + 90, + ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', + 84, + 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', + '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', + 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', + 84, + '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', + 83, + '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', + '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', + '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' + ), + ( + 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', + '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', + '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', + ), + ( + 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', + ), +] + +_NSIG_TESTS = [ + ( + 'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js', + 'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg', + ), + ( + 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', + 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', + ), + ( + 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', + 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', + ), + ( + 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', + 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', + ), + ( + 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js', + 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q', + ), + ( + 'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js', + 'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw', + ), + ( + 'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js', + 'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw', + ), + ( + 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js', + 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA', + ), + ( + 'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js', + 'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA', + ), + ( + 'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', + 'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw', + ), + ( + 'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js', + 'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg', + ), + ( + 'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js', + 'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw', + ), + ( + 'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js', + '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw', + ), + ( + 'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js', + '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ', + ), + ( + 'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js', + 'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg', + ), + ( + 'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js', + 'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA', + ), + ( + 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js', + 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ', + ), + ( + 'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js', + 'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w', + ), + ( + 'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js', + 'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A', + ), + ( + 'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js', + 'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw', + ), + ( + 'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js', + 'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w', + ), + ( + 'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js', + '1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A', + ), + ( + 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', + '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', + ), +] + + +@is_download_test +class TestPlayerInfo(unittest.TestCase): + def test_youtube_extract_player_info(self): + PLAYER_URLS = ( + ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'), + ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'), + ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'), + ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'), + ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'), + ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'), + # obsolete + ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'), + ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'), + ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'), + ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'), + ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'), + ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'), + ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'), + ) + for player_url, expected_player_id in PLAYER_URLS: + player_id = YoutubeIE._extract_player_info(player_url) + self.assertEqual(player_id, expected_player_id) + + +@is_download_test +class TestSignature(unittest.TestCase): + def setUp(self): + TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs') + if not os.path.exists(self.TESTDATA_DIR): + os.mkdir(self.TESTDATA_DIR) + + def tearDown(self): + with contextlib.suppress(OSError): + for f in os.listdir(self.TESTDATA_DIR): + os.remove(f) + + +def t_factory(name, sig_func, url_pattern): + def make_tfunc(url, sig_input, expected_sig): + m = url_pattern.match(url) + assert m, '%r should follow URL format' % url + test_id = m.group('id') + + def test_func(self): + basename = f'player-{name}-{test_id}.js' + fn = os.path.join(self.TESTDATA_DIR, basename) + + if not os.path.exists(fn): + urllib.request.urlretrieve(url, fn) + with open(fn, encoding='utf-8') as testf: + jscode = testf.read() + self.assertEqual(sig_func(jscode, sig_input), expected_sig) + + test_func.__name__ = f'test_{name}_js_{test_id}' + setattr(TestSignature, test_func.__name__, test_func) + return make_tfunc + + +def signature(jscode, sig_input): + func = YoutubeIE(FakeYDL())._parse_sig_js(jscode) + src_sig = ( + str(string.printable[:sig_input]) + if isinstance(sig_input, int) else sig_input) + return func(src_sig) + + +def n_sig(jscode, sig_input): + funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) + return JSInterpreter(jscode).call_function(funcname, sig_input) + + +make_sig_test = t_factory( + 'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$')) +for test_spec in _SIG_TESTS: + make_sig_test(*test_spec) + +make_nsig_test = t_factory( + 'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$')) +for test_spec in _NSIG_TESTS: + make_nsig_test(*test_spec) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/testcert.pem b/test/testcert.pem new file mode 100644 index 0000000..b3e0f00 --- /dev/null +++ b/test/testcert.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB +HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp +Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h +A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev +mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J +aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP +tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54 +BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4 +Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd +63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk +ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE +8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw +XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA +G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o +zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN +8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP +gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ +XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28 +Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/ +98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ +1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S +DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN +Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB +FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD +pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z +VJua1Wn8HKxnXMI61DhTCSo= +-----END PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD +VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG +A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl +c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw +aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw +CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb +MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs +IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh +Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA +zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU +YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq +O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8 +A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh +KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/ +Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd +ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD +AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x +5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb +PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6 +cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu +SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe +Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK +-----END CERTIFICATE----- diff --git a/test/testdata/certificate/ca.crt b/test/testdata/certificate/ca.crt new file mode 100644 index 0000000..ddf7be7 --- /dev/null +++ b/test/testdata/certificate/ca.crt @@ -0,0 +1,10 @@ +-----BEGIN CERTIFICATE----- +MIIBfDCCASOgAwIBAgIUUgngoxFpuWft8gjj3uEFoqJyoJowCgYIKoZIzj0EAwIw +FDESMBAGA1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEwMVoXDTM4MTAxNTAz +MDEwMVowFDESMBAGA1UEAwwJeXRkbHB0ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D +AQcDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCHYxFU +KpcCfVt9aueRyUFi1TNkkkEZ9D6fbqNTMFEwHQYDVR0OBBYEFBdY2rVNLFGM6r1F +iuamNDaiq0QoMB8GA1UdIwQYMBaAFBdY2rVNLFGM6r1FiuamNDaiq0QoMA8GA1Ud +EwEB/wQFMAMBAf8wCgYIKoZIzj0EAwIDRwAwRAIgXJg2jio1kow2g/iP54Qq+iI2 +m4EAvZiY0Im/Ni3PHawCIC6KCl6QcHANbeq8ckOXNGusjl6OWhvEM3uPBPhqskq1 +-----END CERTIFICATE----- diff --git a/test/testdata/certificate/ca.key b/test/testdata/certificate/ca.key new file mode 100644 index 0000000..38920d5 --- /dev/null +++ b/test/testdata/certificate/ca.key @@ -0,0 +1,5 @@ +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIG2L1bHdl3PnaLiJ7Zm8aAGCj4GiVbSbXQcrJAdL+yqOoAoGCCqGSM49 +AwEHoUQDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCH +YxFUKpcCfVt9aueRyUFi1TNkkkEZ9D6fbg== +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/ca.srl b/test/testdata/certificate/ca.srl new file mode 100644 index 0000000..de2d1ea --- /dev/null +++ b/test/testdata/certificate/ca.srl @@ -0,0 +1 @@ +4A260C33C4D34612646E6321E1E767DF1A95EF0B diff --git a/test/testdata/certificate/client.crt b/test/testdata/certificate/client.crt new file mode 100644 index 0000000..874622f --- /dev/null +++ b/test/testdata/certificate/client.crt @@ -0,0 +1,9 @@ +-----BEGIN CERTIFICATE----- +MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG +A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow +FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA +BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS +XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD +aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY +D0dB8M1kJw== +-----END CERTIFICATE----- diff --git a/test/testdata/certificate/client.csr b/test/testdata/certificate/client.csr new file mode 100644 index 0000000..2d5d7a5 --- /dev/null +++ b/test/testdata/certificate/client.csr @@ -0,0 +1,7 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIHQMHcCAQAwFTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqG +SM49AwEHA0IABKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq +3ZuZ7rubyuMSXNuH+2Cl9msSpJB2LhJs5kegADAKBggqhkjOPQQDAgNJADBGAiEA +1LZ72mtPmVxhGtdMvpZ0fyA68H2RC5IMHpLq18T55UcCIQDKpkXXVTvAzS0JioCq +6kiYq8Oxx6ZMoI+11k75/Kip1g== +-----END CERTIFICATE REQUEST----- diff --git a/test/testdata/certificate/client.key b/test/testdata/certificate/client.key new file mode 100644 index 0000000..e47389b --- /dev/null +++ b/test/testdata/certificate/client.key @@ -0,0 +1,5 @@ +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 +AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird +m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/clientencrypted.key b/test/testdata/certificate/clientencrypted.key new file mode 100644 index 0000000..0baee37 --- /dev/null +++ b/test/testdata/certificate/clientencrypted.key @@ -0,0 +1,8 @@ +-----BEGIN EC PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 + +96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS +rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn +IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/clientwithencryptedkey.crt b/test/testdata/certificate/clientwithencryptedkey.crt new file mode 100644 index 0000000..f357e4c --- /dev/null +++ b/test/testdata/certificate/clientwithencryptedkey.crt @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE----- +MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG +A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow +FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA +BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS +XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD +aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY +D0dB8M1kJw== +-----END CERTIFICATE----- +-----BEGIN EC PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 + +96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS +rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn +IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/clientwithkey.crt b/test/testdata/certificate/clientwithkey.crt new file mode 100644 index 0000000..942f6e2 --- /dev/null +++ b/test/testdata/certificate/clientwithkey.crt @@ -0,0 +1,14 @@ +-----BEGIN CERTIFICATE----- +MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG +A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow +FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA +BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS +XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD +aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY +D0dB8M1kJw== +-----END CERTIFICATE----- +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 +AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird +m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/instructions.md b/test/testdata/certificate/instructions.md new file mode 100644 index 0000000..b0e3fbd --- /dev/null +++ b/test/testdata/certificate/instructions.md @@ -0,0 +1,19 @@ +# Generate certificates for client cert tests + +## CA +```sh +openssl ecparam -name prime256v1 -genkey -noout -out ca.key +openssl req -new -x509 -sha256 -days 6027 -key ca.key -out ca.crt -subj "/CN=ytdlptest" +``` + +## Client +```sh +openssl ecparam -name prime256v1 -genkey -noout -out client.key +openssl ec -in client.key -out clientencrypted.key -passout pass:foobar -aes256 +openssl req -new -sha256 -key client.key -out client.csr -subj "/CN=ytdlptest2" +openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 6027 -sha256 +cp client.crt clientwithkey.crt +cp client.crt clientwithencryptedkey.crt +cat client.key >> clientwithkey.crt +cat clientencrypted.key >> clientwithencryptedkey.crt +``` \ No newline at end of file diff --git a/test/testdata/cookies/httponly_cookies.txt b/test/testdata/cookies/httponly_cookies.txt new file mode 100644 index 0000000..c46541d --- /dev/null +++ b/test/testdata/cookies/httponly_cookies.txt @@ -0,0 +1,6 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE +www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE diff --git a/test/testdata/cookies/malformed_cookies.txt b/test/testdata/cookies/malformed_cookies.txt new file mode 100644 index 0000000..17bc403 --- /dev/null +++ b/test/testdata/cookies/malformed_cookies.txt @@ -0,0 +1,9 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +# Cookie file entry with invalid number of fields - 6 instead of 7 +www.foobar.foobar FALSE / FALSE 0 COOKIE + +# Cookie file entry with invalid expires at +www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE diff --git a/test/testdata/cookies/session_cookies.txt b/test/testdata/cookies/session_cookies.txt new file mode 100644 index 0000000..f6996f0 --- /dev/null +++ b/test/testdata/cookies/session_cookies.txt @@ -0,0 +1,6 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue +www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value diff --git a/test/testdata/f4m/custom_base_url.f4m b/test/testdata/f4m/custom_base_url.f4m new file mode 100644 index 0000000..74e1539 --- /dev/null +++ b/test/testdata/f4m/custom_base_url.f4m @@ -0,0 +1,10 @@ +<?xml version="1.0" encoding="UTF-8"?> +<manifest xmlns="http://ns.adobe.com/f4m/1.0"> + <streamType>recorded</streamType> + <baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL> + <duration>269.293</duration> + <bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo> + <media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2"> + <metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata> + </media> +</manifest> diff --git a/test/testdata/ism/ec-3_test.Manifest b/test/testdata/ism/ec-3_test.Manifest new file mode 100644 index 0000000..45f95de --- /dev/null +++ b/test/testdata/ism/ec-3_test.Manifest @@ -0,0 +1 @@ +<?xml version="1.0" encoding="utf-8"?><!--Transformed by VSMT using XSL stylesheet for rule Identity--><!-- Created with Unified Streaming Platform (version=1.10.12-18737) --><SmoothStreamingMedia MajorVersion="2" MinorVersion="0" TimeScale="10000000" Duration="370000000"><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="127802" CodecPrivateData="1190" SamplingRate="48000" Channels="2" BitsPerSample="16" PacketSize="4" AudioTag="255" FourCC="AACL" /><c t="0" d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="7253333" /></StreamIndex><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu_1" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu_1={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="224000" CodecPrivateData="00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00" FourCCData="0700200F00" SamplingRate="48000" Channels="6" BitsPerSample="16" PacketSize="896" AudioTag="65534" FourCC="EC-3" /><c t="0" d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="8320000" /></StreamIndex><StreamIndex Type="video" QualityLevels="8" TimeScale="10000000" Language="deu" Name="video_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(video_deu={start time})?noStreamProfile=1" MaxWidth="1920" MaxHeight="1080" DisplayWidth="1920" DisplayHeight="1080"><QualityLevel Index="0" Bitrate="23909" CodecPrivateData="000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8" MaxWidth="384" MaxHeight="216" FourCC="AVC1" /><QualityLevel Index="1" Bitrate="403188" CodecPrivateData="00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2" MaxWidth="400" MaxHeight="224" FourCC="AVC1" /><QualityLevel Index="2" Bitrate="680365" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="3" Bitrate="1253465" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="4" Bitrate="2121558" CodecPrivateData="00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80" MaxWidth="768" MaxHeight="432" FourCC="AVC1" /><QualityLevel Index="5" Bitrate="3275545" CodecPrivateData="00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80" MaxWidth="1280" MaxHeight="720" FourCC="AVC1" /><QualityLevel Index="6" Bitrate="5300196" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><QualityLevel Index="7" Bitrate="8079312" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><c t="0" d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="10000000" /></StreamIndex></SmoothStreamingMedia> \ No newline at end of file diff --git a/test/testdata/ism/sintel.Manifest b/test/testdata/ism/sintel.Manifest new file mode 100644 index 0000000..2ff8c24 --- /dev/null +++ b/test/testdata/ism/sintel.Manifest @@ -0,0 +1,988 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- Created with Unified Streaming Platform (version=1.10.18-20255) --> +<SmoothStreamingMedia + MajorVersion="2" + MinorVersion="0" + TimeScale="10000000" + Duration="8880746666"> + <StreamIndex + Type="audio" + QualityLevels="1" + TimeScale="10000000" + Name="audio" + Chunks="445" + Url="QualityLevels({bitrate})/Fragments(audio={start time})"> + <QualityLevel + Index="0" + Bitrate="128001" + CodecPrivateData="1190" + SamplingRate="48000" + Channels="2" + BitsPerSample="16" + PacketSize="4" + AudioTag="255" + FourCC="AACL" /> + <c t="0" d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="20053333" /> + <c d="20053333" /> + <c d="20053334" /> + <c d="19840000" /> + <c d="746666" /> + </StreamIndex> + <StreamIndex + Type="text" + QualityLevels="1" + TimeScale="10000000" + Language="eng" + Subtype="CAPT" + Name="textstream_eng" + Chunks="11" + Url="QualityLevels({bitrate})/Fragments(textstream_eng={start time})"> + <QualityLevel + Index="0" + Bitrate="1000" + CodecPrivateData="" + FourCC="TTML" /> + <c t="0" d="600000000" /> + <c d="600000000" /> + <c d="600000000" /> + <c d="600000000" /> + <c d="600000000" /> + <c d="600000000" /> + <c d="600000000" /> + <c d="600000000" /> + <c d="600000000" /> + <c d="600000000" /> + <c d="240000000" /> + </StreamIndex> + <StreamIndex + Type="video" + QualityLevels="5" + TimeScale="10000000" + Name="video" + Chunks="444" + Url="QualityLevels({bitrate})/Fragments(video={start time})" + MaxWidth="1688" + MaxHeight="720" + DisplayWidth="1689" + DisplayHeight="720"> + <QualityLevel + Index="0" + Bitrate="100000" + CodecPrivateData="00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8" + MaxWidth="336" + MaxHeight="144" + FourCC="AVC1" /> + <QualityLevel + Index="1" + Bitrate="326000" + CodecPrivateData="00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8" + MaxWidth="562" + MaxHeight="240" + FourCC="AVC1" /> + <QualityLevel + Index="2" + Bitrate="698000" + CodecPrivateData="00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8" + MaxWidth="844" + MaxHeight="360" + FourCC="AVC1" /> + <QualityLevel + Index="3" + Bitrate="1493000" + CodecPrivateData="00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8" + MaxWidth="1126" + MaxHeight="480" + FourCC="AVC1" /> + <QualityLevel + Index="4" + Bitrate="4482000" + CodecPrivateData="00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8" + MaxWidth="1688" + MaxHeight="720" + FourCC="AVC1" /> + <c t="0" d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + <c d="20000000" /> + </StreamIndex> +</SmoothStreamingMedia> diff --git a/test/testdata/m3u8/bipbop_16x9.m3u8 b/test/testdata/m3u8/bipbop_16x9.m3u8 new file mode 100644 index 0000000..1ce87dd --- /dev/null +++ b/test/testdata/m3u8/bipbop_16x9.m3u8 @@ -0,0 +1,38 @@ +#EXTM3U + +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 1",AUTOSELECT=YES,DEFAULT=YES +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 2",AUTOSELECT=NO,DEFAULT=NO,URI="alternate_audio_aac/prog_index.m3u8" + + +#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/eng/prog_index.m3u8" +#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="en",URI="subtitles/eng_forced/prog_index.m3u8" +#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="fr",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/fra/prog_index.m3u8" +#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="fr",URI="subtitles/fra_forced/prog_index.m3u8" +#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="es",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/spa/prog_index.m3u8" +#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="es",URI="subtitles/spa_forced/prog_index.m3u8" +#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="ja",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/jpn/prog_index.m3u8" +#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語 (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="ja",URI="subtitles/jpn_forced/prog_index.m3u8" + + +#EXT-X-STREAM-INF:BANDWIDTH=263851,CODECS="mp4a.40.2, avc1.4d400d",RESOLUTION=416x234,AUDIO="bipbop_audio",SUBTITLES="subs" +gear1/prog_index.m3u8 +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=28451,CODECS="avc1.4d400d",URI="gear1/iframe_index.m3u8" + +#EXT-X-STREAM-INF:BANDWIDTH=577610,CODECS="mp4a.40.2, avc1.4d401e",RESOLUTION=640x360,AUDIO="bipbop_audio",SUBTITLES="subs" +gear2/prog_index.m3u8 +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=181534,CODECS="avc1.4d401e",URI="gear2/iframe_index.m3u8" + +#EXT-X-STREAM-INF:BANDWIDTH=915905,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=960x540,AUDIO="bipbop_audio",SUBTITLES="subs" +gear3/prog_index.m3u8 +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=297056,CODECS="avc1.4d401f",URI="gear3/iframe_index.m3u8" + +#EXT-X-STREAM-INF:BANDWIDTH=1030138,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1280x720,AUDIO="bipbop_audio",SUBTITLES="subs" +gear4/prog_index.m3u8 +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=339492,CODECS="avc1.4d401f",URI="gear4/iframe_index.m3u8" + +#EXT-X-STREAM-INF:BANDWIDTH=1924009,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1920x1080,AUDIO="bipbop_audio",SUBTITLES="subs" +gear5/prog_index.m3u8 +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=669554,CODECS="avc1.4d401f",URI="gear5/iframe_index.m3u8" + +#EXT-X-STREAM-INF:BANDWIDTH=41457,CODECS="mp4a.40.2",AUDIO="bipbop_audio",SUBTITLES="subs" +gear0/prog_index.m3u8 diff --git a/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8 b/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8 new file mode 100644 index 0000000..620ce04 --- /dev/null +++ b/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8 @@ -0,0 +1,76 @@ +#EXTM3U +#EXT-X-VERSION:6 +#EXT-X-INDEPENDENT-SEGMENTS + + +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2168183,BANDWIDTH=2177116,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" +v5/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7968416,BANDWIDTH=8001098,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" +v9/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6170000,BANDWIDTH=6312875,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" +v8/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4670769,BANDWIDTH=4943747,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" +v7/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3168702,BANDWIDTH=3216424,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" +v6/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1265132,BANDWIDTH=1268994,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" +v4/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=895755,BANDWIDTH=902298,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" +v3/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=530721,BANDWIDTH=541052,CODECS="avc1.640015,mp4a.40.2",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" +v2/prog_index.m3u8 + + +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2390686,BANDWIDTH=2399619,CODECS="avc1.640020,ac-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" +v5/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=8190919,BANDWIDTH=8223601,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" +v9/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6392503,BANDWIDTH=6535378,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" +v8/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4893272,BANDWIDTH=5166250,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" +v7/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3391205,BANDWIDTH=3438927,CODECS="avc1.640020,ac-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" +v6/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1487635,BANDWIDTH=1491497,CODECS="avc1.64001e,ac-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" +v4/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1118258,BANDWIDTH=1124801,CODECS="avc1.64001e,ac-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" +v3/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=753224,BANDWIDTH=763555,CODECS="avc1.640015,ac-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" +v2/prog_index.m3u8 + + +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2198686,BANDWIDTH=2207619,CODECS="avc1.640020,ec-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" +v5/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7998919,BANDWIDTH=8031601,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" +v9/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6200503,BANDWIDTH=6343378,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" +v8/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4701272,BANDWIDTH=4974250,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" +v7/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3199205,BANDWIDTH=3246927,CODECS="avc1.640020,ec-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" +v6/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1295635,BANDWIDTH=1299497,CODECS="avc1.64001e,ec-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" +v4/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=926258,BANDWIDTH=932801,CODECS="avc1.64001e,ec-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" +v3/prog_index.m3u8 +#EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=561224,BANDWIDTH=571555,CODECS="avc1.640015,ec-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" +v2/prog_index.m3u8 + + +#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=183689,BANDWIDTH=187492,CODECS="avc1.64002a",RESOLUTION=1920x1080,URI="v7/iframe_index.m3u8" +#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=132672,BANDWIDTH=136398,CODECS="avc1.640020",RESOLUTION=1280x720,URI="v6/iframe_index.m3u8" +#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=97767,BANDWIDTH=101378,CODECS="avc1.640020",RESOLUTION=960x540,URI="v5/iframe_index.m3u8" +#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=75722,BANDWIDTH=77818,CODECS="avc1.64001e",RESOLUTION=768x432,URI="v4/iframe_index.m3u8" +#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=63522,BANDWIDTH=65091,CODECS="avc1.64001e",RESOLUTION=640x360,URI="v3/iframe_index.m3u8" +#EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=39678,BANDWIDTH=40282,CODECS="avc1.640015",RESOLUTION=480x270,URI="v2/iframe_index.m3u8" + + +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="2",URI="a1/prog_index.m3u8" +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud2",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a2/prog_index.m3u8" +#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud3",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a3/prog_index.m3u8" + + +#EXT-X-MEDIA:TYPE=CLOSED-CAPTIONS,GROUP-ID="cc1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,INSTREAM-ID="CC1" + + +#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="sub1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,FORCED=NO,URI="s1/en/prog_index.m3u8" diff --git a/test/testdata/mpd/float_duration.mpd b/test/testdata/mpd/float_duration.mpd new file mode 100644 index 0000000..8dc1d2d --- /dev/null +++ b/test/testdata/mpd/float_duration.mpd @@ -0,0 +1,18 @@ +<?xml version="1.0" encoding="UTF-8"?> +<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S"> + <Period bitstreamSwitching="true"> + <AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true"> + <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate> + <Representation id="318597" bandwidth="61587"></Representation> + </AdaptationSet> + <AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true"> + <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate> + <Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation> + <Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation> + <Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation> + <Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation> + <Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation> + <Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation> + </AdaptationSet> + </Period> +</MPD> \ No newline at end of file diff --git a/test/testdata/mpd/subtitles.mpd b/test/testdata/mpd/subtitles.mpd new file mode 100644 index 0000000..6f948ad --- /dev/null +++ b/test/testdata/mpd/subtitles.mpd @@ -0,0 +1,351 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- Created with Unified Streaming Platform (version=1.10.18-20255) --> +<MPD + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns="urn:mpeg:dash:schema:mpd:2011" + xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd" + type="static" + mediaPresentationDuration="PT14M48S" + maxSegmentDuration="PT1M" + minBufferTime="PT10S" + profiles="urn:mpeg:dash:profile:isoff-live:2011"> + <Period + id="1" + duration="PT14M48S"> + <BaseURL>dash/</BaseURL> + <AdaptationSet + id="1" + group="1" + contentType="audio" + segmentAlignment="true" + audioSamplingRate="48000" + mimeType="audio/mp4" + codecs="mp4a.40.2" + startWithSAP="1"> + <AudioChannelConfiguration + schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" + value="2" /> + <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" /> + <SegmentTemplate + timescale="48000" + initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash" + media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash"> + <SegmentTimeline> + <S t="0" d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="96256" r="2" /> + <S d="95232" /> + <S d="3584" /> + </SegmentTimeline> + </SegmentTemplate> + <Representation + id="audio=128001" + bandwidth="128001"> + </Representation> + </AdaptationSet> + <AdaptationSet + id="2" + group="3" + contentType="text" + lang="en" + mimeType="application/mp4" + codecs="stpp" + startWithSAP="1"> + <Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" /> + <SegmentTemplate + timescale="1000" + initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash" + media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash"> + <SegmentTimeline> + <S t="0" d="60000" r="9" /> + <S d="24000" /> + </SegmentTimeline> + </SegmentTemplate> + <Representation + id="textstream_eng=1000" + bandwidth="1000"> + </Representation> + </AdaptationSet> + <AdaptationSet + id="3" + group="2" + contentType="video" + par="960:409" + minBandwidth="100000" + maxBandwidth="4482000" + maxWidth="1689" + maxHeight="720" + segmentAlignment="true" + mimeType="video/mp4" + codecs="avc1.4D401F" + startWithSAP="1"> + <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" /> + <SegmentTemplate + timescale="12288" + initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash" + media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash"> + <SegmentTimeline> + <S t="0" d="24576" r="443" /> + </SegmentTimeline> + </SegmentTemplate> + <Representation + id="video=100000" + bandwidth="100000" + width="336" + height="144" + sar="2880:2863" + scanType="progressive"> + </Representation> + <Representation + id="video=326000" + bandwidth="326000" + width="562" + height="240" + sar="115200:114929" + scanType="progressive"> + </Representation> + <Representation + id="video=698000" + bandwidth="698000" + width="844" + height="360" + sar="86400:86299" + scanType="progressive"> + </Representation> + <Representation + id="video=1493000" + bandwidth="1493000" + width="1126" + height="480" + sar="230400:230267" + scanType="progressive"> + </Representation> + <Representation + id="video=4482000" + bandwidth="4482000" + width="1688" + height="720" + sar="86400:86299" + scanType="progressive"> + </Representation> + </AdaptationSet> + </Period> +</MPD> diff --git a/test/testdata/mpd/unfragmented.mpd b/test/testdata/mpd/unfragmented.mpd new file mode 100644 index 0000000..5a3720b --- /dev/null +++ b/test/testdata/mpd/unfragmented.mpd @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?> +<MPD mediaPresentationDuration="PT54.915S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011"> + <Period duration="PT54.915S"> + <AdaptationSet segmentAlignment="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1"> + <Representation bandwidth="804261" codecs="avc1.4d401e" frameRate="30" height="360" id="VIDEO-1" mimeType="video/mp4" startWithSAP="1" width="360"> + <BaseURL>DASH_360</BaseURL> + <SegmentBase indexRange="915-1114" indexRangeExact="true"> + <Initialization range="0-914"/> + </SegmentBase> + </Representation> + <Representation bandwidth="608000" codecs="avc1.4d401e" frameRate="30" height="240" id="VIDEO-2" mimeType="video/mp4" startWithSAP="1" width="240"> + <BaseURL>DASH_240</BaseURL> + <SegmentBase indexRange="913-1112" indexRangeExact="true"> + <Initialization range="0-912"/> + </SegmentBase> + </Representation> + </AdaptationSet> + <AdaptationSet> + <Representation audioSamplingRate="48000" bandwidth="129870" codecs="mp4a.40.2" id="AUDIO-1" mimeType="audio/mp4" startWithSAP="1"> + <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/> + <BaseURL>audio</BaseURL> + <SegmentBase indexRange="832-1007" indexRangeExact="true"> + <Initialization range="0-831"/> + </SegmentBase> + </Representation> + </AdaptationSet> + </Period> +</MPD> diff --git a/test/testdata/mpd/urls_only.mpd b/test/testdata/mpd/urls_only.mpd new file mode 100644 index 0000000..2b9d595 --- /dev/null +++ b/test/testdata/mpd/urls_only.mpd @@ -0,0 +1,218 @@ +<?xml version="1.0" ?> +<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011"> + <Period duration="PT0H4M1.728S"> + <AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true"> + <ContentComponent contentType="video" id="1"/> + <Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + <Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920"> + <SegmentList duration="10000" timescale="1000"> + <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/> + <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/> + </SegmentList> + </Representation> + </AdaptationSet> + </Period> +</MPD> diff --git a/test/testdata/thumbnails/foo %d bar/foo_%d.webp b/test/testdata/thumbnails/foo %d bar/foo_%d.webp new file mode 100644 index 0000000..d64d083 Binary files /dev/null and b/test/testdata/thumbnails/foo %d bar/foo_%d.webp differ diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf new file mode 100644 index 0000000..b7f0086 --- /dev/null +++ b/test/testdata/xspf/foo_xspf.xspf @@ -0,0 +1,34 @@ +<?xml version="1.0" encoding="UTF-8"?> +<playlist version="1" xmlns="http://xspf.org/ns/0/"> + <date>2018-03-09T18:01:43Z</date> + <trackList> + <track> + <location>cd1/track%201.mp3</location> + <title>Pandemonium + Foilverb + Visit http://bigbrother404.bandcamp.com + Pandemonium EP + 1 + 202416 + + + ../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3 + Final Cartridge (Nichico Twelve Remix) + Visit http://bigbrother404.bandcamp.com + Foilverb + Pandemonium EP + 2 + 255857 + + + track3.mp3 + https://example.com/track3.mp3 + Rebuilding Nightingale + Visit http://bigbrother404.bandcamp.com + Foilverb + Pandemonium EP + 3 + 287915 + + + diff --git a/test/testdata/yt_dlp_plugins/extractor/_ignore.py b/test/testdata/yt_dlp_plugins/extractor/_ignore.py new file mode 100644 index 0000000..57faf75 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/_ignore.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/extractor/ignore.py b/test/testdata/yt_dlp_plugins/extractor/ignore.py new file mode 100644 index 0000000..816a16a --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/ignore.py @@ -0,0 +1,12 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnoreNotInAllPluginIE(InfoExtractor): + pass + + +class InAllPluginIE(InfoExtractor): + pass + + +__all__ = ['InAllPluginIE'] diff --git a/test/testdata/yt_dlp_plugins/extractor/normal.py b/test/testdata/yt_dlp_plugins/extractor/normal.py new file mode 100644 index 0000000..b09009b --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/normal.py @@ -0,0 +1,9 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class NormalPluginIE(InfoExtractor): + pass + + +class _IgnoreUnderscorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/postprocessor/normal.py b/test/testdata/yt_dlp_plugins/postprocessor/normal.py new file mode 100644 index 0000000..315b85a --- /dev/null +++ b/test/testdata/yt_dlp_plugins/postprocessor/normal.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class NormalPluginPP(PostProcessor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py new file mode 100644 index 0000000..01542e0 --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class ZippedPluginIE(InfoExtractor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py new file mode 100644 index 0000000..223822b --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class ZippedPluginPP(PostProcessor): + pass diff --git a/yt-dlp.cmd b/yt-dlp.cmd new file mode 100644 index 0000000..5537e0e --- /dev/null +++ b/yt-dlp.cmd @@ -0,0 +1 @@ +@py -Werror -Xdev "%~dp0yt_dlp\__main__.py" %* diff --git a/yt-dlp.sh b/yt-dlp.sh new file mode 100755 index 0000000..ce74df8 --- /dev/null +++ b/yt-dlp.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env sh +exec "${PYTHON:-python3}" -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py new file mode 100644 index 0000000..c34d97b --- /dev/null +++ b/yt_dlp/YoutubeDL.py @@ -0,0 +1,4339 @@ +import collections +import contextlib +import copy +import datetime +import errno +import fileinput +import http.cookiejar +import io +import itertools +import json +import locale +import operator +import os +import random +import re +import shutil +import string +import subprocess +import sys +import tempfile +import time +import tokenize +import traceback +import unicodedata + +from .cache import Cache +from .compat import functools, urllib # isort: split +from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req +from .cookies import LenientSimpleCookie, load_cookies +from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name +from .downloader.rtmp import rtmpdump_version +from .extractor import gen_extractor_classes, get_info_extractor +from .extractor.common import UnsupportedURLIE +from .extractor.openload import PhantomJSwrapper +from .minicurses import format_text +from .networking import HEADRequest, Request, RequestDirector +from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES +from .networking.exceptions import ( + HTTPError, + NoSupportingHandlers, + RequestError, + SSLError, + network_exceptions, +) +from .plugins import directories as plugin_directories +from .postprocessor import _PLUGIN_CLASSES as plugin_pps +from .postprocessor import ( + EmbedThumbnailPP, + FFmpegFixupDuplicateMoovPP, + FFmpegFixupDurationPP, + FFmpegFixupM3u8PP, + FFmpegFixupM4aPP, + FFmpegFixupStretchedPP, + FFmpegFixupTimestampPP, + FFmpegMergerPP, + FFmpegPostProcessor, + FFmpegVideoConvertorPP, + MoveFilesAfterDownloadPP, + get_postprocessor, +) +from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping +from .update import ( + REPOSITORY, + _get_system_deprecation, + _make_label, + current_git_head, + detect_variant, +) +from .utils import ( + DEFAULT_OUTTMPL, + IDENTITY, + LINK_TEMPLATES, + MEDIA_EXTENSIONS, + NO_DEFAULT, + NUMBER_RE, + OUTTMPL_TYPES, + POSTPROCESS_WHEN, + STR_FORMAT_RE_TMPL, + STR_FORMAT_TYPES, + ContentTooShortError, + DateRange, + DownloadCancelled, + DownloadError, + EntryNotInPlaylist, + ExistingVideoReached, + ExtractorError, + FormatSorter, + GeoRestrictedError, + ISO3166Utils, + LazyList, + MaxDownloadsReached, + Namespace, + PagedList, + PlaylistEntries, + Popen, + PostProcessingError, + ReExtractInfo, + RejectedVideoReached, + SameFileError, + UnavailableVideoError, + UserNotLive, + age_restricted, + args_to_str, + bug_reports_message, + date_from_str, + deprecation_warning, + determine_ext, + determine_protocol, + encode_compat_str, + encodeFilename, + error_to_compat_str, + escapeHTML, + expand_path, + extract_basic_auth, + filter_dict, + float_or_none, + format_bytes, + format_decimal_suffix, + format_field, + formatSeconds, + get_compatible_ext, + get_domain, + int_or_none, + iri_to_uri, + is_path_like, + join_nonempty, + locked_file, + make_archive_id, + make_dir, + number_of_digits, + orderedSet, + orderedSet_from_options, + parse_filesize, + preferredencoding, + prepend_extension, + remove_terminal_sequences, + render_table, + replace_extension, + sanitize_filename, + sanitize_path, + sanitize_url, + str_or_none, + strftime_or_none, + subtitles_filename, + supports_terminal_sequences, + system_identifier, + timetuple_from_msec, + to_high_limit_path, + traverse_obj, + try_call, + try_get, + url_basename, + variadic, + version_tuple, + windows_enable_vt_mode, + write_json_file, + write_string, +) +from .utils._utils import _YDLLogger +from .utils.networking import ( + HTTPHeaderDict, + clean_headers, + clean_proxies, + std_headers, +) +from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__ + +if compat_os_name == 'nt': + import ctypes + + +class YoutubeDL: + """YoutubeDL class. + + YoutubeDL objects are the ones responsible of downloading the + actual video file and writing it to disk if the user has requested + it, among some other tasks. In most cases there should be one per + program. As, given a video URL, the downloader doesn't know how to + extract all the needed information, task that InfoExtractors do, it + has to pass the URL to one of them. + + For this, YoutubeDL objects have a method that allows + InfoExtractors to be registered in a given order. When it is passed + a URL, the YoutubeDL object handles it to the first InfoExtractor it + finds that reports being able to handle it. The InfoExtractor extracts + all the information about the video or videos the URL refers to, and + YoutubeDL process the extracted information, possibly using a File + Downloader to download the video. + + YoutubeDL objects accept a lot of parameters. In order not to saturate + the object constructor with arguments, it receives a dictionary of + options instead. These options are available through the params + attribute for the InfoExtractors to use. The YoutubeDL also + registers itself as the downloader in charge for the InfoExtractors + that are added to it, so this is a "mutual registration". + + Available options: + + username: Username for authentication purposes. + password: Password for authentication purposes. + videopassword: Password for accessing a video. + ap_mso: Adobe Pass multiple-system operator identifier. + ap_username: Multiple-system operator account username. + ap_password: Multiple-system operator account password. + usenetrc: Use netrc for authentication instead. + netrc_location: Location of the netrc file. Defaults to ~/.netrc. + netrc_cmd: Use a shell command to get credentials + verbose: Print additional info to stdout. + quiet: Do not print messages to stdout. + no_warnings: Do not print out anything for warnings. + forceprint: A dict with keys WHEN mapped to a list of templates to + print to stdout. The allowed keys are video or any of the + items in utils.POSTPROCESS_WHEN. + For compatibility, a single list is also accepted + print_to_file: A dict with keys WHEN (same as forceprint) mapped to + a list of tuples with (template, filename) + forcejson: Force printing info_dict as JSON. + dump_single_json: Force printing the info_dict of the whole playlist + (or video) as a single JSON line. + force_write_download_archive: Force writing download archive regardless + of 'skip_download' or 'simulate'. + simulate: Do not download the video files. If unset (or None), + simulate only if listsubtitles, listformats or list_thumbnails is used + format: Video format code. see "FORMAT SELECTION" for more details. + You can also pass a function. The function takes 'ctx' as + argument and returns the formats to download. + See "build_format_selector" for an implementation + allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. + ignore_no_formats_error: Ignore "No video formats" error. Usefull for + extracting metadata even if the video is not actually + available for download (experimental) + format_sort: A list of fields by which to sort the video formats. + See "Sorting Formats" for more details. + format_sort_force: Force the given format_sort. see "Sorting Formats" + for more details. + prefer_free_formats: Whether to prefer video formats with free containers + over non-free ones of same quality. + allow_multiple_video_streams: Allow multiple video streams to be merged + into a single file + allow_multiple_audio_streams: Allow multiple audio streams to be merged + into a single file + check_formats Whether to test if the formats are downloadable. + Can be True (check all), False (check none), + 'selected' (check selected formats), + or None (check only if requested by extractor) + paths: Dictionary of output paths. The allowed keys are 'home' + 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py) + outtmpl: Dictionary of templates for output names. Allowed keys + are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py). + For compatibility with youtube-dl, a single string can also be used + outtmpl_na_placeholder: Placeholder for unavailable meta fields. + restrictfilenames: Do not allow "&" and spaces in file names + trim_file_name: Limit length of filename (extension excluded) + windowsfilenames: Force the filenames to be windows compatible + ignoreerrors: Do not stop on download/postprocessing errors. + Can be 'only_download' to ignore only download errors. + Default is 'only_download' for CLI, but False for API + skip_playlist_after_errors: Number of allowed failures until the rest of + the playlist is skipped + allowed_extractors: List of regexes to match against extractor names that are allowed + overwrites: Overwrite all video and metadata files if True, + overwrite only non-video files if None + and don't overwrite any file if False + playlist_items: Specific indices of playlist to download. + playlistrandom: Download playlist items in random order. + lazy_playlist: Process playlist entries as they are received. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. + logger: Log messages to a logging.Logger instance. + logtostderr: Print everything to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file + clean_infojson: Remove internal metadata from the infojson + getcomments: Extract video comments. This will not be written to disk + unless writeinfojson is also given + writeannotations: Write the video annotations to a .annotations.xml file + writethumbnail: Write the thumbnail image to a file + allow_playlist_files: Whether to write playlists' description, infojson etc + also to disk when using the 'write*' options + write_all_thumbnails: Write all thumbnail formats to files + writelink: Write an internet shortcut file, depending on the + current platform (.url/.webloc/.desktop) + writeurllink: Write a Windows internet shortcut file (.url) + writewebloclink: Write a macOS internet shortcut file (.webloc) + writedesktoplink: Write a Linux internet shortcut file (.desktop) + writesubtitles: Write the video subtitles to a file + writeautomaticsub: Write the automatically generated subtitles to a file + listsubtitles: Lists all available subtitles for the video + subtitlesformat: The format code for subtitles + subtitleslangs: List of languages of the subtitles to download (can be regex). + The list may contain "all" to refer to all the available + subtitles. The language can be prefixed with a "-" to + exclude it from the requested languages, e.g. ['all', '-live_chat'] + keepvideo: Keep the video file after post-processing + daterange: A utils.DateRange object, download only if the upload_date is in the range. + skip_download: Skip the actual download of the video file + cachedir: Location of the cache files in the filesystem. + False to disable filesystem cache. + noplaylist: Download single video instead of a playlist if in doubt. + age_limit: An integer representing the user's age in years. + Unsuitable videos for the given age are skipped. + min_views: An integer representing the minimum view count the video + must have in order to not be skipped. + Videos without view count information are always + downloaded. None for no limit. + max_views: An integer representing the maximum view count. + Videos that are more popular than that are not + downloaded. + Videos without view count information are always + downloaded. None for no limit. + download_archive: A set, or the name of a file where all downloads are recorded. + Videos already present in the file are not downloaded again. + break_on_existing: Stop the download process after attempting to download a + file that is in the archive. + break_per_url: Whether break_on_reject and break_on_existing + should act on each input URL as opposed to for the entire queue + cookiefile: File name or text stream from where cookies should be read and dumped to + cookiesfrombrowser: A tuple containing the name of the browser, the profile + name/path from where cookies are loaded, the name of the keyring, + and the container name, e.g. ('chrome', ) or + ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta') + legacyserverconnect: Explicitly allow HTTPS connection to servers that do not + support RFC 5746 secure renegotiation + nocheckcertificate: Do not verify SSL certificates + client_certificate: Path to client certificate file in PEM format. May include the private key + client_certificate_key: Path to private key file for client certificate + client_certificate_password: Password for client certificate private key, if encrypted. + If not provided and the key is encrypted, yt-dlp will ask interactively + prefer_insecure: Use HTTP instead of HTTPS to retrieve information. + (Only supported by some extractors) + enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons. + http_headers: A dictionary of custom headers to be used for all requests + proxy: URL of the proxy server to use + geo_verification_proxy: URL of the proxy to use for IP address verification + on geo-restricted sites. + socket_timeout: Time to wait for unresponsive hosts, in seconds + bidi_workaround: Work around buggy terminals without bidirectional text + support, using fridibi + debug_printtraffic:Print out sent and received HTTP traffic + default_search: Prepend this string if an input url is not valid. + 'auto' for elaborate guessing + encoding: Use this encoding instead of the system-specified. + extract_flat: Whether to resolve and process url_results further + * False: Always process. Default for API + * True: Never process + * 'in_playlist': Do not process inside playlist/multi_video + * 'discard': Always process, but don't return the result + from inside playlist/multi_video + * 'discard_in_playlist': Same as "discard", but only for + playlists (not multi_video). Default for CLI + wait_for_video: If given, wait for scheduled streams to become available. + The value should be a tuple containing the range + (min_secs, max_secs) to wait between retries + postprocessors: A list of dictionaries, each with an entry + * key: The name of the postprocessor. See + yt_dlp/postprocessor/__init__.py for a list. + * when: When to run the postprocessor. Allowed values are + the entries of utils.POSTPROCESS_WHEN + Assumed to be 'post_process' if not given + progress_hooks: A list of functions that get called on download + progress, with a dictionary with the entries + * status: One of "downloading", "error", or "finished". + Check this first and ignore unknown values. + * info_dict: The extracted info_dict + + If status is one of "downloading", or "finished", the + following properties may also be present: + * filename: The final filename (always present) + * tmpfilename: The filename we're currently writing to + * downloaded_bytes: Bytes on disk + * total_bytes: Size of the whole file, None if unknown + * total_bytes_estimate: Guess of the eventual file size, + None if unavailable. + * elapsed: The number of seconds since download started. + * eta: The estimated time in seconds, None if unknown + * speed: The download speed in bytes/second, None if + unknown + * fragment_index: The counter of the currently + downloaded video fragment. + * fragment_count: The number of fragments (= individual + files that will be merged) + + Progress hooks are guaranteed to be called at least once + (with status "finished") if the download is successful. + postprocessor_hooks: A list of functions that get called on postprocessing + progress, with a dictionary with the entries + * status: One of "started", "processing", or "finished". + Check this first and ignore unknown values. + * postprocessor: Name of the postprocessor + * info_dict: The extracted info_dict + + Progress hooks are guaranteed to be called at least twice + (with status "started" and "finished") if the processing is successful. + merge_output_format: "/" separated list of extensions to use when merging formats. + final_ext: Expected final extension; used to detect when the file was + already downloaded and converted + fixup: Automatically correct known faults of the file. + One of: + - "never": do nothing + - "warn": only emit a warning + - "detect_or_warn": check whether we can do anything + about it, warn otherwise (default) + source_address: Client-side IP address to bind to. + sleep_interval_requests: Number of seconds to sleep between requests + during extraction + sleep_interval: Number of seconds to sleep before each download when + used alone or a lower bound of a range for randomized + sleep before each download (minimum possible number + of seconds to sleep) when used along with + max_sleep_interval. + max_sleep_interval:Upper bound of a range for randomized sleep before each + download (maximum possible number of seconds to sleep). + Must only be used along with sleep_interval. + Actual sleep time will be a random float from range + [sleep_interval; max_sleep_interval]. + sleep_interval_subtitles: Number of seconds to sleep before each subtitle download + listformats: Print an overview of available video formats and exit. + list_thumbnails: Print a table of all thumbnails and exit. + match_filter: A function that gets called for every video with the signature + (info_dict, *, incomplete: bool) -> Optional[str] + For backward compatibility with youtube-dl, the signature + (info_dict) -> Optional[str] is also allowed. + - If it returns a message, the video is ignored. + - If it returns None, the video is downloaded. + - If it returns utils.NO_DEFAULT, the user is interactively + asked whether to download the video. + - Raise utils.DownloadCancelled(msg) to abort remaining + downloads when a video is rejected. + match_filter_func in utils/_utils.py is one example for this. + color: A Dictionary with output stream names as keys + and their respective color policy as values. + Can also just be a single color policy, + in which case it applies to all outputs. + Valid stream names are 'stdout' and 'stderr'. + Valid color policies are one of 'always', 'auto', 'no_color' or 'never'. + geo_bypass: Bypass geographic restriction via faking X-Forwarded-For + HTTP header + geo_bypass_country: + Two-letter ISO 3166-2 country code that will be used for + explicit geographic restriction bypassing via faking + X-Forwarded-For HTTP header + geo_bypass_ip_block: + IP range in CIDR notation that will be used similarly to + geo_bypass_country + external_downloader: A dictionary of protocol keys and the executable of the + external downloader to use for it. The allowed protocols + are default|http|ftp|m3u8|dash|rtsp|rtmp|mms. + Set the value to 'native' to use the native downloader + compat_opts: Compatibility options. See "Differences in default behavior". + The following options do not work when used through the API: + filename, abort-on-error, multistreams, no-live-chat, format-sort + no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. + Refer __init__.py for their implementation + progress_template: Dictionary of templates for progress outputs. + Allowed keys are 'download', 'postprocess', + 'download-title' (console title) and 'postprocess-title'. + The template is mapped on a dictionary with keys 'progress' and 'info' + retry_sleep_functions: Dictionary of functions that takes the number of attempts + as argument and returns the time to sleep in seconds. + Allowed keys are 'http', 'fragment', 'file_access' + download_ranges: A callback function that gets called for every video with + the signature (info_dict, ydl) -> Iterable[Section]. + Only the returned sections will be downloaded. + Each Section is a dict with the following keys: + * start_time: Start time of the section in seconds + * end_time: End time of the section in seconds + * title: Section title (Optional) + * index: Section number (Optional) + force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts + noprogress: Do not print the progress bar + live_from_start: Whether to download livestreams videos from the start + + The following parameters are not used by YoutubeDL itself, they are used by + the downloader (see yt_dlp/downloader/common.py): + nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, + max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries, + continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size, + external_downloader_args, concurrent_fragment_downloads. + + The following options are used by the post processors: + ffmpeg_location: Location of the ffmpeg/avconv binary; either the path + to the binary or its containing directory. + postprocessor_args: A dictionary of postprocessor/executable keys (in lower case) + and a list of additional command-line arguments for the + postprocessor/executable. The dict can also have "PP+EXE" keys + which are used when the given exe is used by the given PP. + Use 'default' as the name for arguments to passed to all PP + For compatibility with youtube-dl, a single list of args + can also be used + + The following options are used by the extractors: + extractor_retries: Number of times to retry for known errors (default: 3) + dynamic_mpd: Whether to process dynamic DASH manifests (default: True) + hls_split_discontinuity: Split HLS playlists to different formats at + discontinuities such as ad breaks (default: False) + extractor_args: A dictionary of arguments to be passed to the extractors. + See "EXTRACTOR ARGUMENTS" for details. + E.g. {'youtube': {'skip': ['dash', 'hls']}} + mark_watched: Mark videos watched (even with --simulate). Only for YouTube + + The following options are deprecated and may be removed in the future: + + break_on_reject: Stop the download process when encountering a video that + has been filtered out. + - `raise DownloadCancelled(msg)` in match_filter instead + force_generic_extractor: Force downloader to use the generic extractor + - Use allowed_extractors = ['generic', 'default'] + playliststart: - Use playlist_items + Playlist item to start at. + playlistend: - Use playlist_items + Playlist item to end at. + playlistreverse: - Use playlist_items + Download playlist items in reverse order. + forceurl: - Use forceprint + Force printing final URL. + forcetitle: - Use forceprint + Force printing title. + forceid: - Use forceprint + Force printing ID. + forcethumbnail: - Use forceprint + Force printing thumbnail URL. + forcedescription: - Use forceprint + Force printing description. + forcefilename: - Use forceprint + Force printing final filename. + forceduration: - Use forceprint + Force printing duration. + allsubtitles: - Use subtitleslangs = ['all'] + Downloads all the subtitles of the video + (requires writesubtitles or writeautomaticsub) + include_ads: - Doesn't work + Download ads as well + call_home: - Not implemented + Boolean, true iff we are allowed to contact the + yt-dlp servers for debugging. + post_hooks: - Register a custom postprocessor + A list of functions that get called as the final step + for each video file, after all postprocessors have been + called. The filename will be passed as the only argument. + hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}. + Use the native HLS downloader instead of ffmpeg/avconv + if True, otherwise use ffmpeg/avconv if False, otherwise + use downloader suggested by extractor if None. + prefer_ffmpeg: - avconv support is deprecated + If False, use avconv instead of ffmpeg if both are available, + otherwise prefer ffmpeg. + youtube_include_dash_manifest: - Use extractor_args + If True (default), DASH manifests and related + data will be downloaded and processed by extractor. + You can reduce network I/O by disabling it if you don't + care about DASH. (only for youtube) + youtube_include_hls_manifest: - Use extractor_args + If True (default), HLS manifests and related + data will be downloaded and processed by extractor. + You can reduce network I/O by disabling it if you don't + care about HLS. (only for youtube) + no_color: Same as `color='no_color'` + no_overwrites: Same as `overwrites=False` + """ + + _NUMERIC_FIELDS = { + 'width', 'height', 'asr', 'audio_channels', 'fps', + 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx', + 'timestamp', 'release_timestamp', + 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', + 'average_rating', 'comment_count', 'age_limit', + 'start_time', 'end_time', + 'chapter_number', 'season_number', 'episode_number', + 'track_number', 'disc_number', 'release_year', + } + + _format_fields = { + # NB: Keep in sync with the docstring of extractor/common.py + 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', + 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', + 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', + 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data', + 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies', + 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', + 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' + } + _deprecated_multivalue_fields = { + 'album_artist': 'album_artists', + 'artist': 'artists', + 'composer': 'composers', + 'creator': 'creators', + 'genre': 'genres', + } + _format_selection_exts = { + 'audio': set(MEDIA_EXTENSIONS.common_audio), + 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), + 'storyboards': set(MEDIA_EXTENSIONS.storyboards), + } + + def __init__(self, params=None, auto_init=True): + """Create a FileDownloader object with the given options. + @param auto_init Whether to load the default extractors and print header (if verbose). + Set to 'no_verbose_header' to not print the header + """ + if params is None: + params = {} + self.params = params + self._ies = {} + self._ies_instances = {} + self._pps = {k: [] for k in POSTPROCESS_WHEN} + self._printed_messages = set() + self._first_webpage_request = True + self._post_hooks = [] + self._progress_hooks = [] + self._postprocessor_hooks = [] + self._download_retcode = 0 + self._num_downloads = 0 + self._num_videos = 0 + self._playlist_level = 0 + self._playlist_urls = set() + self.cache = Cache(self) + self.__header_cookies = [] + + stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout + self._out_files = Namespace( + out=stdout, + error=sys.stderr, + screen=sys.stderr if self.params.get('quiet') else stdout, + console=None if compat_os_name == 'nt' else next( + filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) + ) + + try: + windows_enable_vt_mode() + except Exception as e: + self.write_debug(f'Failed to enable VT mode: {e}') + + if self.params.get('no_color'): + if self.params.get('color') is not None: + self.params.setdefault('_warnings', []).append( + 'Overwriting params from "color" with "no_color"') + self.params['color'] = 'no_color' + + term_allow_color = os.getenv('TERM', '').lower() != 'dumb' + no_color = bool(os.getenv('NO_COLOR')) + + def process_color_policy(stream): + stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream] + policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False) + if policy in ('auto', None): + if term_allow_color and supports_terminal_sequences(stream): + return 'no_color' if no_color else True + return False + assert policy in ('always', 'never', 'no_color'), policy + return {'always': True, 'never': False}.get(policy, policy) + + self._allow_colors = Namespace(**{ + name: process_color_policy(stream) + for name, stream in self._out_files.items_ if name != 'console' + }) + + system_deprecation = _get_system_deprecation() + if system_deprecation: + self.deprecated_feature(system_deprecation.replace('\n', '\n ')) + + if self.params.get('allow_unplayable_formats'): + self.report_warning( + f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. ' + 'This is a developer option intended for debugging. \n' + ' If you experience any issues while using this option, ' + f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') + + if self.params.get('bidi_workaround', False): + try: + import pty + master, slave = pty.openpty() + width = shutil.get_terminal_size().columns + width_args = [] if width is None else ['-w', str(width)] + sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} + try: + self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) + except OSError: + self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_channel = os.fdopen(master, 'rb') + except OSError as ose: + if ose.errno == errno.ENOENT: + self.report_warning( + 'Could not find fribidi executable, ignoring --bidi-workaround. ' + 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') + else: + raise + + self.params['compat_opts'] = set(self.params.get('compat_opts', ())) + self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers')) + self._load_cookies(self.params['http_headers'].get('Cookie')) # compat + self.params['http_headers'].pop('Cookie', None) + + if auto_init and auto_init != 'no_verbose_header': + self.print_debug_header() + + def check_deprecated(param, option, suggestion): + if self.params.get(param) is not None: + self.report_warning(f'{option} is deprecated. Use {suggestion} instead') + return True + return False + + if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): + if self.params.get('geo_verification_proxy') is None: + self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] + + check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') + check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') + check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') + + for msg in self.params.get('_warnings', []): + self.report_warning(msg) + for msg in self.params.get('_deprecation_warnings', []): + self.deprecated_feature(msg) + + if 'list-formats' in self.params['compat_opts']: + self.params['listformats_table'] = False + + if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: + # nooverwrites was unnecessarily changed to overwrites + # in 0c3d0f51778b153f65c21906031c2e091fcfb641 + # This ensures compatibility with both keys + self.params['overwrites'] = not self.params['nooverwrites'] + elif self.params.get('overwrites') is None: + self.params.pop('overwrites', None) + else: + self.params['nooverwrites'] = not self.params['overwrites'] + + if self.params.get('simulate') is None and any(( + self.params.get('list_thumbnails'), + self.params.get('listformats'), + self.params.get('listsubtitles'), + )): + self.params['simulate'] = 'list_only' + + self.params.setdefault('forceprint', {}) + self.params.setdefault('print_to_file', {}) + + # Compatibility with older syntax + if not isinstance(params['forceprint'], dict): + self.params['forceprint'] = {'video': params['forceprint']} + + if auto_init: + self.add_default_info_extractors() + + if (sys.platform != 'win32' + and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] + and not self.params.get('restrictfilenames', False)): + # Unicode filesystem API will throw errors (#1474, #13027) + self.report_warning( + 'Assuming --restrict-filenames since file system encoding ' + 'cannot encode all characters. ' + 'Set the LC_ALL environment variable to fix this.') + self.params['restrictfilenames'] = True + + self._parse_outtmpl() + + # Creating format selector here allows us to catch syntax errors before the extraction + self.format_selector = ( + self.params.get('format') if self.params.get('format') in (None, '-') + else self.params['format'] if callable(self.params['format']) + else self.build_format_selector(self.params['format'])) + + hooks = { + 'post_hooks': self.add_post_hook, + 'progress_hooks': self.add_progress_hook, + 'postprocessor_hooks': self.add_postprocessor_hook, + } + for opt, fn in hooks.items(): + for ph in self.params.get(opt, []): + fn(ph) + + for pp_def_raw in self.params.get('postprocessors', []): + pp_def = dict(pp_def_raw) + when = pp_def.pop('when', 'post_process') + self.add_post_processor( + get_postprocessor(pp_def.pop('key'))(self, **pp_def), + when=when) + + def preload_download_archive(fn): + """Preload the archive, if any is specified""" + archive = set() + if fn is None: + return archive + elif not is_path_like(fn): + return fn + + self.write_debug(f'Loading archive file {fn!r}') + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + archive.add(line.strip()) + except OSError as ioe: + if ioe.errno != errno.ENOENT: + raise + return archive + + self.archive = preload_download_archive(self.params.get('download_archive')) + + def warn_if_short_id(self, argv): + # short YouTube ID starting with dash? + idxs = [ + i for i, a in enumerate(argv) + if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] + if idxs: + correct_argv = ( + ['yt-dlp'] + + [a for i, a in enumerate(argv) if i not in idxs] + + ['--'] + [argv[i] for i in idxs] + ) + self.report_warning( + 'Long argument string detected. ' + 'Use -- to separate parameters and URLs, like this:\n%s' % + args_to_str(correct_argv)) + + def add_info_extractor(self, ie): + """Add an InfoExtractor object to the end of the list.""" + ie_key = ie.ie_key() + self._ies[ie_key] = ie + if not isinstance(ie, type): + self._ies_instances[ie_key] = ie + ie.set_downloader(self) + + def get_info_extractor(self, ie_key): + """ + Get an instance of an IE with name ie_key, it will try to get one from + the _ies list, if there's no instance it will create a new one and add + it to the extractor list. + """ + ie = self._ies_instances.get(ie_key) + if ie is None: + ie = get_info_extractor(ie_key)() + self.add_info_extractor(ie) + return ie + + def add_default_info_extractors(self): + """ + Add the InfoExtractors returned by gen_extractors to the end of the list + """ + all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()} + all_ies['end'] = UnsupportedURLIE() + try: + ie_names = orderedSet_from_options( + self.params.get('allowed_extractors', ['default']), { + 'all': list(all_ies), + 'default': [name for name, ie in all_ies.items() if ie._ENABLED], + }, use_regex=True) + except re.error as e: + raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}') + for name in ie_names: + self.add_info_extractor(all_ies[name]) + self.write_debug(f'Loaded {len(ie_names)} extractors') + + def add_post_processor(self, pp, when='post_process'): + """Add a PostProcessor object to the end of the chain.""" + assert when in POSTPROCESS_WHEN, f'Invalid when={when}' + self._pps[when].append(pp) + pp.set_downloader(self) + + def add_post_hook(self, ph): + """Add the post hook""" + self._post_hooks.append(ph) + + def add_progress_hook(self, ph): + """Add the download progress hook""" + self._progress_hooks.append(ph) + + def add_postprocessor_hook(self, ph): + """Add the postprocessing progress hook""" + self._postprocessor_hooks.append(ph) + for pps in self._pps.values(): + for pp in pps: + pp.add_progress_hook(ph) + + def _bidi_workaround(self, message): + if not hasattr(self, '_output_channel'): + return message + + assert hasattr(self, '_output_process') + assert isinstance(message, str) + line_count = message.count('\n') + 1 + self._output_process.stdin.write((message + '\n').encode()) + self._output_process.stdin.flush() + res = ''.join(self._output_channel.readline().decode() + for _ in range(line_count)) + return res[:-len('\n')] + + def _write_string(self, message, out=None, only_once=False): + if only_once: + if message in self._printed_messages: + return + self._printed_messages.add(message) + write_string(message, out=out, encoding=self.params.get('encoding')) + + def to_stdout(self, message, skip_eol=False, quiet=None): + """Print message to stdout""" + if quiet is not None: + self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. ' + 'Use "YoutubeDL.to_screen" instead') + if skip_eol is not False: + self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. ' + 'Use "YoutubeDL.to_screen" instead') + self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out) + + def to_screen(self, message, skip_eol=False, quiet=None, only_once=False): + """Print message to screen if not in quiet mode""" + if self.params.get('logger'): + self.params['logger'].debug(message) + return + if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'): + return + self._write_string( + '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + self._out_files.screen, only_once=only_once) + + def to_stderr(self, message, only_once=False): + """Print message to stderr""" + assert isinstance(message, str) + if self.params.get('logger'): + self.params['logger'].error(message) + else: + self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) + + def _send_console_code(self, code): + if compat_os_name == 'nt' or not self._out_files.console: + return + self._write_string(code, self._out_files.console) + + def to_console_title(self, message): + if not self.params.get('consoletitle', False): + return + message = remove_terminal_sequences(message) + if compat_os_name == 'nt': + if ctypes.windll.kernel32.GetConsoleWindow(): + # c_wchar_p() might not be necessary if `message` is + # already of type unicode() + ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) + else: + self._send_console_code(f'\033]0;{message}\007') + + def save_console_title(self): + if not self.params.get('consoletitle') or self.params.get('simulate'): + return + self._send_console_code('\033[22;0t') # Save the title on stack + + def restore_console_title(self): + if not self.params.get('consoletitle') or self.params.get('simulate'): + return + self._send_console_code('\033[23;0t') # Restore the title from stack + + def __enter__(self): + self.save_console_title() + return self + + def save_cookies(self): + if self.params.get('cookiefile') is not None: + self.cookiejar.save() + + def __exit__(self, *args): + self.restore_console_title() + self.close() + + def close(self): + self.save_cookies() + if '_request_director' in self.__dict__: + self._request_director.close() + del self._request_director + + def trouble(self, message=None, tb=None, is_error=True): + """Determine action to take when a download problem appears. + + Depending on if the downloader has been configured to ignore + download errors or not, this method may throw an exception or + not when errors are found, after printing the message. + + @param tb If given, is additional traceback information + @param is_error Whether to raise error according to ignorerrors + """ + if message is not None: + self.to_stderr(message) + if self.params.get('verbose'): + if tb is None: + if sys.exc_info()[0]: # if .trouble has been called from an except block + tb = '' + if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: + tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) + tb += encode_compat_str(traceback.format_exc()) + else: + tb_data = traceback.format_list(traceback.extract_stack()) + tb = ''.join(tb_data) + if tb: + self.to_stderr(tb) + if not is_error: + return + if not self.params.get('ignoreerrors'): + if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: + exc_info = sys.exc_info()[1].exc_info + else: + exc_info = sys.exc_info() + raise DownloadError(message, exc_info) + self._download_retcode = 1 + + Styles = Namespace( + HEADERS='yellow', + EMPHASIS='light blue', + FILENAME='green', + ID='green', + DELIM='blue', + ERROR='red', + BAD_FORMAT='light red', + WARNING='yellow', + SUPPRESS='light black', + ) + + def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): + text = str(text) + if test_encoding: + original_text = text + # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711 + encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii' + text = text.encode(encoding, 'ignore').decode(encoding) + if fallback is not None and text != original_text: + text = fallback + return format_text(text, f) if allow_colors is True else text if fallback is None else fallback + + def _format_out(self, *args, **kwargs): + return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs) + + def _format_screen(self, *args, **kwargs): + return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs) + + def _format_err(self, *args, **kwargs): + return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs) + + def report_warning(self, message, only_once=False): + ''' + Print the message to stderr, it will be prefixed with 'WARNING:' + If stderr is a tty file the 'WARNING:' will be colored + ''' + if self.params.get('logger') is not None: + self.params['logger'].warning(message) + else: + if self.params.get('no_warnings'): + return + self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once) + + def deprecation_warning(self, message, *, stacklevel=0): + deprecation_warning( + message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False) + + def deprecated_feature(self, message): + if self.params.get('logger') is not None: + self.params['logger'].warning(f'Deprecated Feature: {message}') + self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True) + + def report_error(self, message, *args, **kwargs): + ''' + Do the same as trouble, but prefixes the message with 'ERROR:', colored + in red if stderr is a tty file. + ''' + self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) + + def write_debug(self, message, only_once=False): + '''Log debug message or Print message to stderr''' + if not self.params.get('verbose', False): + return + message = f'[debug] {message}' + if self.params.get('logger'): + self.params['logger'].debug(message) + else: + self.to_stderr(message, only_once) + + def report_file_already_downloaded(self, file_name): + """Report file has already been fully downloaded.""" + try: + self.to_screen('[download] %s has already been downloaded' % file_name) + except UnicodeEncodeError: + self.to_screen('[download] The file has already been downloaded') + + def report_file_delete(self, file_name): + """Report that existing file will be deleted.""" + try: + self.to_screen('Deleting existing file %s' % file_name) + except UnicodeEncodeError: + self.to_screen('Deleting existing file') + + def raise_no_formats(self, info, forced=False, *, msg=None): + has_drm = info.get('_has_drm') + ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) + msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' + if forced or not ignored: + raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], + expected=has_drm or ignored or expected) + else: + self.report_warning(msg) + + def parse_outtmpl(self): + self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version') + self._parse_outtmpl() + return self.params['outtmpl'] + + def _parse_outtmpl(self): + sanitize = IDENTITY + if self.params.get('restrictfilenames'): # Remove spaces in the default template + sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-') + + outtmpl = self.params.setdefault('outtmpl', {}) + if not isinstance(outtmpl, dict): + self.params['outtmpl'] = outtmpl = {'default': outtmpl} + outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}) + + def get_output_path(self, dir_type='', filename=None): + paths = self.params.get('paths', {}) + assert isinstance(paths, dict), '"paths" parameter must be a dictionary' + path = os.path.join( + expand_path(paths.get('home', '').strip()), + expand_path(paths.get(dir_type, '').strip()) if dir_type else '', + filename or '') + return sanitize_path(path, force=self.params.get('windowsfilenames')) + + @staticmethod + def _outtmpl_expandpath(outtmpl): + # expand_path translates '%%' into '%' and '$$' into '$' + # correspondingly that is not what we want since we need to keep + # '%%' intact for template dict substitution step. Working around + # with boundary-alike separator hack. + sep = ''.join(random.choices(string.ascii_letters, k=32)) + outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') + + # outtmpl should be expand_path'ed before template dict substitution + # because meta fields may contain env variables we don't want to + # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and + # title "Hello $PATH", we don't want `$PATH` to be expanded. + return expand_path(outtmpl).replace(sep, '') + + @staticmethod + def escape_outtmpl(outtmpl): + ''' Escape any remaining strings like %s, %abc% etc. ''' + return re.sub( + STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), + lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), + outtmpl) + + @classmethod + def validate_outtmpl(cls, outtmpl): + ''' @return None or Exception object ''' + outtmpl = re.sub( + STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'), + lambda mobj: f'{mobj.group(0)[:-1]}s', + cls._outtmpl_expandpath(outtmpl)) + try: + cls.escape_outtmpl(outtmpl) % collections.defaultdict(int) + return None + except ValueError as err: + return err + + @staticmethod + def _copy_infodict(info_dict): + info_dict = dict(info_dict) + info_dict.pop('__postprocessors', None) + info_dict.pop('__pending_error', None) + return info_dict + + def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): + """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict + @param sanitize Whether to sanitize the output as a filename. + For backward compatibility, a function can also be passed + """ + + info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set + + info_dict = self._copy_infodict(info_dict) + info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs + formatSeconds(info_dict['duration'], '-' if sanitize else ':') + if info_dict.get('duration', None) is not None + else None) + info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads) + info_dict['video_autonumber'] = self._num_videos + if info_dict.get('resolution') is None: + info_dict['resolution'] = self.format_resolution(info_dict, default=None) + + # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences + # of %(field)s to %(field)0Nd for backward compatibility + field_size_compat_map = { + 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0), + 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), + 'autonumber': self.params.get('autonumber_size') or 5, + } + + TMPL_DICT = {} + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]')) + MATH_FUNCTIONS = { + '+': float.__add__, + '-': float.__sub__, + '*': float.__mul__, + } + # Field is of the form key1.key2... + # where keys (except first) can be string, int, slice or "{field, ...}" + FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} + FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { + 'inner': FIELD_INNER_RE, + 'field': rf'\w*(?:\.{FIELD_INNER_RE})*' + } + MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' + MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) + INTERNAL_FORMAT_RE = re.compile(rf'''(?xs) + (?P-)? + (?P{FIELD_RE}) + (?P(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*) + (?:>(?P.+?))? + (?P + (?P(?.*?))? + (?:\|(?P.*?))? + )$''') + + def _from_user_input(field): + if field == ':': + return ... + elif ':' in field: + return slice(*map(int_or_none, field.split(':'))) + elif int_or_none(field) is not None: + return int(field) + return field + + def _traverse_infodict(fields): + fields = [f for x in re.split(r'\.({.+?})\.?', fields) + for f in ([x] if x.startswith('{') else x.split('.'))] + for i in (0, -1): + if fields and not fields[i]: + fields.pop(i) + + for i, f in enumerate(fields): + if not f.startswith('{'): + fields[i] = _from_user_input(f) + continue + assert f.endswith('}'), f'No closing brace for {f} in {fields}' + fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')} + + return traverse_obj(info_dict, fields, traverse_string=True) + + def get_value(mdict): + # Object traversal + value = _traverse_infodict(mdict['fields']) + # Negative + if mdict['negate']: + value = float_or_none(value) + if value is not None: + value *= -1 + # Do maths + offset_key = mdict['maths'] + if offset_key: + value = float_or_none(value) + operator = None + while offset_key: + item = re.match( + MATH_FIELD_RE if operator else MATH_OPERATORS_RE, + offset_key).group(0) + offset_key = offset_key[len(item):] + if operator is None: + operator = MATH_FUNCTIONS[item] + continue + item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) + offset = float_or_none(item) + if offset is None: + offset = float_or_none(_traverse_infodict(item)) + try: + value = operator(value, multiplier * offset) + except (TypeError, ZeroDivisionError): + return None + operator = None + # Datetime formatting + if mdict['strf_format']: + value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ',')) + + # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485 + if sanitize and value == '': + value = None + return value + + na = self.params.get('outtmpl_na_placeholder', 'NA') + + def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): + return sanitize_filename(str(value), restricted=restricted, is_id=( + bool(re.search(r'(^|[_.])id(\.|$)', key)) + if 'filename-sanitization' in self.params['compat_opts'] + else NO_DEFAULT)) + + sanitizer = sanitize if callable(sanitize) else filename_sanitizer + sanitize = bool(sanitize) + + def _dumpjson_default(obj): + if isinstance(obj, (set, LazyList)): + return list(obj) + return repr(obj) + + class _ReplacementFormatter(string.Formatter): + def get_field(self, field_name, args, kwargs): + if field_name.isdigit(): + return args[0], -1 + raise ValueError('Unsupported field') + + replacement_formatter = _ReplacementFormatter() + + def create_key(outer_mobj): + if not outer_mobj.group('has_key'): + return outer_mobj.group(0) + key = outer_mobj.group('key') + mobj = re.match(INTERNAL_FORMAT_RE, key) + value, replacement, default, last_field = None, None, na, '' + while mobj: + mobj = mobj.groupdict() + default = mobj['default'] if mobj['default'] is not None else default + value = get_value(mobj) + last_field, replacement = mobj['fields'], mobj['replacement'] + if value is None and mobj['alternate']: + mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:]) + else: + break + + if None not in (value, replacement): + try: + value = replacement_formatter.format(replacement, value) + except ValueError: + value, default = None, na + + fmt = outer_mobj.group('format') + if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int): + fmt = f'0{field_size_compat_map[last_field]:d}d' + + flags = outer_mobj.group('conversion') or '' + str_fmt = f'{fmt[:-1]}s' + if value is None: + value, fmt = default, 's' + elif fmt[-1] == 'l': # list + delim = '\n' if '#' in flags else ', ' + value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt + elif fmt[-1] == 'j': # json + value, fmt = json.dumps( + value, default=_dumpjson_default, + indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt + elif fmt[-1] == 'h': # html + value, fmt = escapeHTML(str(value)), str_fmt + elif fmt[-1] == 'q': # quoted + value = map(str, variadic(value) if '#' in flags else [value]) + value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt + elif fmt[-1] == 'B': # bytes + value = f'%{str_fmt}'.encode() % str(value).encode() + value, fmt = value.decode('utf-8', 'ignore'), 's' + elif fmt[-1] == 'U': # unicode normalized + value, fmt = unicodedata.normalize( + # "+" = compatibility equivalence, "#" = NFD + 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), + value), str_fmt + elif fmt[-1] == 'D': # decimal suffix + num_fmt, fmt = fmt[:-1].replace('#', ''), 's' + value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s', + factor=1024 if '#' in flags else 1000) + elif fmt[-1] == 'S': # filename sanitization + value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt + elif fmt[-1] == 'c': + if value: + value = str(value)[0] + else: + fmt = str_fmt + elif fmt[-1] not in 'rsa': # numeric + value = float_or_none(value) + if value is None: + value, fmt = default, 's' + + if sanitize: + # If value is an object, sanitize might convert it to a string + # So we convert it to repr first + if fmt[-1] == 'r': + value, fmt = repr(value), str_fmt + elif fmt[-1] == 'a': + value, fmt = ascii(value), str_fmt + if fmt[-1] in 'csra': + value = sanitizer(last_field, value) + + key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) + TMPL_DICT[key] = value + return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) + + return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT + + def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): + outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) + return self.escape_outtmpl(outtmpl) % info_dict + + def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None): + assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive' + if outtmpl is None: + outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default']) + try: + outtmpl = self._outtmpl_expandpath(outtmpl) + filename = self.evaluate_outtmpl(outtmpl, info_dict, True) + if not filename: + return None + + if tmpl_type in ('', 'temp'): + final_ext, ext = self.params.get('final_ext'), info_dict.get('ext') + if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'): + filename = replace_extension(filename, ext, final_ext) + elif tmpl_type: + force_ext = OUTTMPL_TYPES[tmpl_type] + if force_ext: + filename = replace_extension(filename, force_ext, info_dict.get('ext')) + + # https://github.com/blackjack4494/youtube-dlc/issues/85 + trim_file_name = self.params.get('trim_file_name', False) + if trim_file_name: + no_ext, *ext = filename.rsplit('.', 2) + filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.') + + return filename + except ValueError as err: + self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') + return None + + def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False): + """Generate the output filename""" + if outtmpl: + assert not dir_type, 'outtmpl and dir_type are mutually exclusive' + dir_type = None + filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl) + if not filename and dir_type not in ('', 'temp'): + return '' + + if warn: + if not self.params.get('paths'): + pass + elif filename == '-': + self.report_warning('--paths is ignored when an outputting to stdout', only_once=True) + elif os.path.isabs(filename): + self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True) + if filename == '-' or not filename: + return filename + + return self.get_output_path(dir_type, filename) + + def _match_entry(self, info_dict, incomplete=False, silent=False): + """Returns None if the file should be downloaded""" + _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video') + assert incomplete or _type == 'video', 'Only video result can be considered complete' + + video_title = info_dict.get('title', info_dict.get('id', 'entry')) + + def check_filter(): + if _type in ('playlist', 'multi_video'): + return + elif _type in ('url', 'url_transparent') and not try_call( + lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])): + return + + if 'title' in info_dict: + # This can happen when we're just evaluating the playlist + title = info_dict['title'] + matchtitle = self.params.get('matchtitle', False) + if matchtitle: + if not re.search(matchtitle, title, re.IGNORECASE): + return '"' + title + '" title did not match pattern "' + matchtitle + '"' + rejecttitle = self.params.get('rejecttitle', False) + if rejecttitle: + if re.search(rejecttitle, title, re.IGNORECASE): + return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' + + date = info_dict.get('upload_date') + if date is not None: + dateRange = self.params.get('daterange', DateRange()) + if date not in dateRange: + return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}' + view_count = info_dict.get('view_count') + if view_count is not None: + min_views = self.params.get('min_views') + if min_views is not None and view_count < min_views: + return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) + max_views = self.params.get('max_views') + if max_views is not None and view_count > max_views: + return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) + if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): + return 'Skipping "%s" because it is age restricted' % video_title + + match_filter = self.params.get('match_filter') + if match_filter is None: + return None + + cancelled = None + try: + try: + ret = match_filter(info_dict, incomplete=incomplete) + except TypeError: + # For backward compatibility + ret = None if incomplete else match_filter(info_dict) + except DownloadCancelled as err: + if err.msg is not NO_DEFAULT: + raise + ret, cancelled = err.msg, err + + if ret is NO_DEFAULT: + while True: + filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) + reply = input(self._format_screen( + f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() + if reply in {'y', ''}: + return None + elif reply == 'n': + if cancelled: + raise type(cancelled)(f'Skipping {video_title}') + return f'Skipping {video_title}' + return ret + + if self.in_download_archive(info_dict): + reason = ''.join(( + format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '), + format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '), + 'has already been recorded in the archive')) + break_opt, break_err = 'break_on_existing', ExistingVideoReached + else: + try: + reason = check_filter() + except DownloadCancelled as e: + reason, break_opt, break_err = e.msg, 'match_filter', type(e) + else: + break_opt, break_err = 'break_on_reject', RejectedVideoReached + if reason is not None: + if not silent: + self.to_screen('[download] ' + reason) + if self.params.get(break_opt, False): + raise break_err() + return reason + + @staticmethod + def add_extra_info(info_dict, extra_info): + '''Set the keys from extra_info in info dict if they are missing''' + for key, value in extra_info.items(): + info_dict.setdefault(key, value) + + def extract_info(self, url, download=True, ie_key=None, extra_info=None, + process=True, force_generic_extractor=False): + """ + Extract and return the information dictionary of the URL + + Arguments: + @param url URL to extract + + Keyword arguments: + @param download Whether to download videos + @param process Whether to resolve all unresolved references (URLs, playlist items). + Must be True for download to work + @param ie_key Use only the extractor with this key + + @param extra_info Dictionary containing the extra values to add to the info (For internal use only) + @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic') + """ + + if extra_info is None: + extra_info = {} + + if not ie_key and force_generic_extractor: + ie_key = 'Generic' + + if ie_key: + ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {} + else: + ies = self._ies + + for key, ie in ies.items(): + if not ie.suitable(url): + continue + + if not ie.working(): + self.report_warning('The program functionality for this site has been marked as broken, ' + 'and will probably not work.') + + temp_id = ie.get_temp_id(url) + if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}): + self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: ' + 'has already been recorded in the archive') + if self.params.get('break_on_existing', False): + raise ExistingVideoReached() + break + return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process) + else: + extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default']) + self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}', + tb=False if extractors_restricted else None) + + def _handle_extraction_exceptions(func): + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + while True: + try: + return func(self, *args, **kwargs) + except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): + raise + except ReExtractInfo as e: + if e.expected: + self.to_screen(f'{e}; Re-extracting data') + else: + self.to_stderr('\r') + self.report_warning(f'{e}; Re-extracting data') + continue + except GeoRestrictedError as e: + msg = e.msg + if e.countries: + msg += '\nThis video is available in %s.' % ', '.join( + map(ISO3166Utils.short2full, e.countries)) + msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' + self.report_error(msg) + except ExtractorError as e: # An error we somewhat expected + self.report_error(str(e), e.format_traceback()) + except Exception as e: + if self.params.get('ignoreerrors'): + self.report_error(str(e), tb=encode_compat_str(traceback.format_exc())) + else: + raise + break + return wrapper + + def _wait_for_video(self, ie_result={}): + if (not self.params.get('wait_for_video') + or ie_result.get('_type', 'video') != 'video' + or ie_result.get('formats') or ie_result.get('url')): + return + + format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1] + last_msg = '' + + def progress(msg): + nonlocal last_msg + full_msg = f'{msg}\n' + if not self.params.get('noprogress'): + full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r' + elif last_msg: + return + self.to_screen(full_msg, skip_eol=True) + last_msg = msg + + min_wait, max_wait = self.params.get('wait_for_video') + diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time()) + if diff is None and ie_result.get('live_status') == 'is_upcoming': + diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0) + self.report_warning('Release time of video is not known') + elif ie_result and (diff or 0) <= 0: + self.report_warning('Video should already be available according to extracted info') + diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf')) + self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now') + + wait_till = time.time() + diff + try: + while True: + diff = wait_till - time.time() + if diff <= 0: + progress('') + raise ReExtractInfo('[wait] Wait period ended', expected=True) + progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}') + time.sleep(1) + except KeyboardInterrupt: + progress('') + raise ReExtractInfo('[wait] Interrupted by user', expected=True) + except BaseException as e: + if not isinstance(e, ReExtractInfo): + self.to_screen('') + raise + + def _load_cookies(self, data, *, autoscope=True): + """Loads cookies from a `Cookie` header + + This tries to work around the security vulnerability of passing cookies to every domain. + See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj + + @param data The Cookie header as string to load the cookies from + @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains + If `True`, save cookies for later to be stored in the jar with a limited scope + If a URL, save cookies in the jar with the domain of the URL + """ + for cookie in LenientSimpleCookie(data).values(): + if autoscope and any(cookie.values()): + raise ValueError('Invalid syntax in Cookie Header') + + domain = cookie.get('domain') or '' + expiry = cookie.get('expires') + if expiry == '': # 0 is valid + expiry = None + prepared_cookie = http.cookiejar.Cookie( + cookie.get('version') or 0, cookie.key, cookie.value, None, False, + domain, True, True, cookie.get('path') or '', bool(cookie.get('path')), + cookie.get('secure') or False, expiry, False, None, None, {}) + + if domain: + self.cookiejar.set_cookie(prepared_cookie) + elif autoscope is True: + self.deprecated_feature( + 'Passing cookies as a header is a potential security risk; ' + 'they will be scoped to the domain of the downloaded urls. ' + 'Please consider loading cookies from a file or browser instead.') + self.__header_cookies.append(prepared_cookie) + elif autoscope: + self.report_warning( + 'The extractor result contains an unscoped cookie as an HTTP header. ' + f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}', + only_once=True) + self._apply_header_cookies(autoscope, [prepared_cookie]) + else: + self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping', + tb=False, is_error=False) + + def _apply_header_cookies(self, url, cookies=None): + """Applies stray header cookies to the provided url + + This loads header cookies and scopes them to the domain provided in `url`. + While this is not ideal, it helps reduce the risk of them being sent + to an unintended destination while mostly maintaining compatibility. + """ + parsed = urllib.parse.urlparse(url) + if not parsed.hostname: + return + + for cookie in map(copy.copy, cookies or self.__header_cookies): + cookie.domain = f'.{parsed.hostname}' + self.cookiejar.set_cookie(cookie) + + @_handle_extraction_exceptions + def __extract_info(self, url, ie, download, extra_info, process): + self._apply_header_cookies(url) + + try: + ie_result = ie.extract(url) + except UserNotLive as e: + if process: + if self.params.get('wait_for_video'): + self.report_warning(e) + self._wait_for_video() + raise + if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) + self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}') + return + if isinstance(ie_result, list): + # Backwards compatibility: old IE result format + ie_result = { + '_type': 'compat_list', + 'entries': ie_result, + } + if extra_info.get('original_url'): + ie_result.setdefault('original_url', extra_info['original_url']) + self.add_default_extra_info(ie_result, ie, url) + if process: + self._wait_for_video(ie_result) + return self.process_ie_result(ie_result, download, extra_info) + else: + return ie_result + + def add_default_extra_info(self, ie_result, ie, url): + if url is not None: + self.add_extra_info(ie_result, { + 'webpage_url': url, + 'original_url': url, + }) + webpage_url = ie_result.get('webpage_url') + if webpage_url: + self.add_extra_info(ie_result, { + 'webpage_url_basename': url_basename(webpage_url), + 'webpage_url_domain': get_domain(webpage_url), + }) + if ie is not None: + self.add_extra_info(ie_result, { + 'extractor': ie.IE_NAME, + 'extractor_key': ie.ie_key(), + }) + + def process_ie_result(self, ie_result, download=True, extra_info=None): + """ + Take the result of the ie(may be modified) and resolve all unresolved + references (URLs, playlist items). + + It will also download the videos if 'download'. + Returns the resolved ie_result. + """ + if extra_info is None: + extra_info = {} + result_type = ie_result.get('_type', 'video') + + if result_type in ('url', 'url_transparent'): + ie_result['url'] = sanitize_url( + ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https') + if ie_result.get('original_url') and not extra_info.get('original_url'): + extra_info = {'original_url': ie_result['original_url'], **extra_info} + + extract_flat = self.params.get('extract_flat', False) + if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) + or extract_flat is True): + info_copy = ie_result.copy() + ie = try_get(ie_result.get('ie_key'), self.get_info_extractor) + if ie and not ie_result.get('id'): + info_copy['id'] = ie.get_temp_id(ie_result['url']) + self.add_default_extra_info(info_copy, ie, ie_result['url']) + self.add_extra_info(info_copy, extra_info) + info_copy, _ = self.pre_process(info_copy) + self._fill_common_fields(info_copy, False) + self.__forced_printings(info_copy) + self._raise_pending_errors(info_copy) + if self.params.get('force_write_download_archive', False): + self.record_download_archive(info_copy) + return ie_result + + if result_type == 'video': + self.add_extra_info(ie_result, extra_info) + ie_result = self.process_video_result(ie_result, download=download) + self._raise_pending_errors(ie_result) + additional_urls = (ie_result or {}).get('additional_urls') + if additional_urls: + # TODO: Improve MetadataParserPP to allow setting a list + if isinstance(additional_urls, str): + additional_urls = [additional_urls] + self.to_screen( + '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls))) + self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) + ie_result['additional_entries'] = [ + self.extract_info( + url, download, extra_info=extra_info, + force_generic_extractor=self.params.get('force_generic_extractor')) + for url in additional_urls + ] + return ie_result + elif result_type == 'url': + # We have to add extra_info to the results because it may be + # contained in a playlist + return self.extract_info( + ie_result['url'], download, + ie_key=ie_result.get('ie_key'), + extra_info=extra_info) + elif result_type == 'url_transparent': + # Use the information from the embedding page + info = self.extract_info( + ie_result['url'], ie_key=ie_result.get('ie_key'), + extra_info=extra_info, download=False, process=False) + + # extract_info may return None when ignoreerrors is enabled and + # extraction failed with an error, don't crash and return early + # in this case + if not info: + return info + + exempted_fields = {'_type', 'url', 'ie_key'} + if not ie_result.get('section_end') and ie_result.get('section_start') is None: + # For video clips, the id etc of the clip extractor should be used + exempted_fields |= {'id', 'extractor', 'extractor_key'} + + new_result = info.copy() + new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields)) + + # Extracted info may not be a video result (i.e. + # info.get('_type', 'video') != video) but rather an url or + # url_transparent. In such cases outer metadata (from ie_result) + # should be propagated to inner one (info). For this to happen + # _type of info should be overridden with url_transparent. This + # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163. + if new_result.get('_type') == 'url': + new_result['_type'] = 'url_transparent' + + return self.process_ie_result( + new_result, download=download, extra_info=extra_info) + elif result_type in ('playlist', 'multi_video'): + # Protect from infinite recursion due to recursively nested playlists + # (see https://github.com/ytdl-org/youtube-dl/issues/27833) + webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url + if webpage_url and webpage_url in self._playlist_urls: + self.to_screen( + '[download] Skipping already downloaded playlist: %s' + % ie_result.get('title') or ie_result.get('id')) + return + + self._playlist_level += 1 + self._playlist_urls.add(webpage_url) + self._fill_common_fields(ie_result, False) + self._sanitize_thumbnails(ie_result) + try: + return self.__process_playlist(ie_result, download) + finally: + self._playlist_level -= 1 + if not self._playlist_level: + self._playlist_urls.clear() + elif result_type == 'compat_list': + self.report_warning( + 'Extractor %s returned a compat_list result. ' + 'It needs to be updated.' % ie_result.get('extractor')) + + def _fixup(r): + self.add_extra_info(r, { + 'extractor': ie_result['extractor'], + 'webpage_url': ie_result['webpage_url'], + 'webpage_url_basename': url_basename(ie_result['webpage_url']), + 'webpage_url_domain': get_domain(ie_result['webpage_url']), + 'extractor_key': ie_result['extractor_key'], + }) + return r + ie_result['entries'] = [ + self.process_ie_result(_fixup(r), download, extra_info) + for r in ie_result['entries'] + ] + return ie_result + else: + raise Exception('Invalid result type: %s' % result_type) + + def _ensure_dir_exists(self, path): + return make_dir(path, self.report_error) + + @staticmethod + def _playlist_infodict(ie_result, strict=False, **kwargs): + info = { + 'playlist_count': ie_result.get('playlist_count'), + 'playlist': ie_result.get('title') or ie_result.get('id'), + 'playlist_id': ie_result.get('id'), + 'playlist_title': ie_result.get('title'), + 'playlist_uploader': ie_result.get('uploader'), + 'playlist_uploader_id': ie_result.get('uploader_id'), + **kwargs, + } + if strict: + return info + if ie_result.get('webpage_url'): + info.update({ + 'webpage_url': ie_result['webpage_url'], + 'webpage_url_basename': url_basename(ie_result['webpage_url']), + 'webpage_url_domain': get_domain(ie_result['webpage_url']), + }) + return { + **info, + 'playlist_index': 0, + '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)), + 'extractor': ie_result['extractor'], + 'extractor_key': ie_result['extractor_key'], + } + + def __process_playlist(self, ie_result, download): + """Process each entry in the playlist""" + assert ie_result['_type'] in ('playlist', 'multi_video') + + common_info = self._playlist_infodict(ie_result, strict=True) + title = common_info.get('playlist') or '' + if self._match_entry(common_info, incomplete=True) is not None: + return + self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}') + + all_entries = PlaylistEntries(self, ie_result) + entries = orderedSet(all_entries.get_requested_items(), lazy=True) + + lazy = self.params.get('lazy_playlist') + if lazy: + resolved_entries, n_entries = [], 'N/A' + ie_result['requested_entries'], ie_result['entries'] = None, None + else: + entries = resolved_entries = list(entries) + n_entries = len(resolved_entries) + ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], []) + if not ie_result.get('playlist_count'): + # Better to do this after potentially exhausting entries + ie_result['playlist_count'] = all_entries.get_full_count() + + extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries)) + ie_copy = collections.ChainMap(ie_result, extra) + + _infojson_written = False + write_playlist_files = self.params.get('allow_playlist_files', True) + if write_playlist_files and self.params.get('list_thumbnails'): + self.list_thumbnails(ie_result) + if write_playlist_files and not self.params.get('simulate'): + _infojson_written = self._write_info_json( + 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) + if _infojson_written is None: + return + if self._write_description('playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_description')) is None: + return + # TODO: This should be passed to ThumbnailsConvertor if necessary + self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail')) + + if lazy: + if self.params.get('playlistreverse') or self.params.get('playlistrandom'): + self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True) + elif self.params.get('playlistreverse'): + entries.reverse() + elif self.params.get('playlistrandom'): + random.shuffle(entries) + + self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items' + f'{format_field(ie_result, "playlist_count", " of %s")}') + + keep_resolved_entries = self.params.get('extract_flat') != 'discard' + if self.params.get('extract_flat') == 'discard_in_playlist': + keep_resolved_entries = ie_result['_type'] != 'playlist' + if keep_resolved_entries: + self.write_debug('The information of all playlist entries will be held in memory') + + failures = 0 + max_failures = self.params.get('skip_playlist_after_errors') or float('inf') + for i, (playlist_index, entry) in enumerate(entries): + if lazy: + resolved_entries.append((playlist_index, entry)) + if not entry: + continue + + entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip') + if not lazy and 'playlist-index' in self.params['compat_opts']: + playlist_index = ie_result['requested_entries'][i] + + entry_copy = collections.ChainMap(entry, { + **common_info, + 'n_entries': int_or_none(n_entries), + 'playlist_index': playlist_index, + 'playlist_autonumber': i + 1, + }) + + if self._match_entry(entry_copy, incomplete=True) is not None: + # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369 + resolved_entries[i] = (playlist_index, NO_DEFAULT) + continue + + self.to_screen('[download] Downloading item %s of %s' % ( + self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) + + entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ + 'playlist_index': playlist_index, + 'playlist_autonumber': i + 1, + }, extra)) + if not entry_result: + failures += 1 + if failures >= max_failures: + self.report_error( + f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction') + break + if keep_resolved_entries: + resolved_entries[i] = (playlist_index, entry_result) + + # Update with processed data + ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT] + ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT] + if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))): + # Do not set for full playlist + ie_result.pop('requested_entries') + + # Write the updated info to json + if _infojson_written is True and self._write_info_json( + 'updated playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: + return + + ie_result = self.run_all_pps('playlist', ie_result) + self.to_screen(f'[download] Finished downloading playlist: {title}') + return ie_result + + @_handle_extraction_exceptions + def __process_iterable_entry(self, entry, download, extra_info): + return self.process_ie_result( + entry, download=download, extra_info=extra_info) + + def _build_format_filter(self, filter_spec): + " Returns a function to filter the formats according to the filter_spec " + + OPERATORS = { + '<': operator.lt, + '<=': operator.le, + '>': operator.gt, + '>=': operator.ge, + '=': operator.eq, + '!=': operator.ne, + } + operator_rex = re.compile(r'''(?x)\s* + (?P[\w.-]+)\s* + (?P%s)(?P\s*\?)?\s* + (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* + ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + m = operator_rex.fullmatch(filter_spec) + if m: + try: + comparison_value = int(m.group('value')) + except ValueError: + comparison_value = parse_filesize(m.group('value')) + if comparison_value is None: + comparison_value = parse_filesize(m.group('value') + 'B') + if comparison_value is None: + raise ValueError( + 'Invalid value %r in format specification %r' % ( + m.group('value'), filter_spec)) + op = OPERATORS[m.group('op')] + + if not m: + STR_OPERATORS = { + '=': operator.eq, + '^=': lambda attr, value: attr.startswith(value), + '$=': lambda attr, value: attr.endswith(value), + '*=': lambda attr, value: value in attr, + '~=': lambda attr, value: value.search(attr) is not None + } + str_operator_rex = re.compile(r'''(?x)\s* + (?P[a-zA-Z0-9._-]+)\s* + (?P!\s*)?(?P%s)\s*(?P\?\s*)? + (?P["'])? + (?P(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+)) + (?(quote)(?P=quote))\s* + ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) + m = str_operator_rex.fullmatch(filter_spec) + if m: + if m.group('op') == '~=': + comparison_value = re.compile(m.group('value')) + else: + comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value')) + str_op = STR_OPERATORS[m.group('op')] + if m.group('negation'): + op = lambda attr, value: not str_op(attr, value) + else: + op = str_op + + if not m: + raise SyntaxError('Invalid filter specification %r' % filter_spec) + + def _filter(f): + actual_value = f.get(m.group('key')) + if actual_value is None: + return m.group('none_inclusive') + return op(actual_value, comparison_value) + return _filter + + def _check_formats(self, formats): + for f in formats: + self.to_screen('[info] Testing format %s' % f['format_id']) + path = self.get_output_path('temp') + if not self._ensure_dir_exists(f'{path}/'): + continue + temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) + temp_file.close() + try: + success, _ = self.dl(temp_file.name, f, test=True) + except (DownloadError, OSError, ValueError) + network_exceptions: + success = False + finally: + if os.path.exists(temp_file.name): + try: + os.remove(temp_file.name) + except OSError: + self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + if success: + yield f + else: + self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + + def _default_format_spec(self, info_dict, download=True): + + def can_merge(): + merger = FFmpegMergerPP(self) + return merger.available and merger.can_merge() + + prefer_best = ( + not self.params.get('simulate') + and download + and ( + not can_merge() + or info_dict.get('is_live') and not self.params.get('live_from_start') + or self.params['outtmpl']['default'] == '-')) + compat = ( + prefer_best + or self.params.get('allow_multiple_audio_streams', False) + or 'format-spec' in self.params['compat_opts']) + + return ( + 'best/bestvideo+bestaudio' if prefer_best + else 'bestvideo*+bestaudio/best' if not compat + else 'bestvideo+bestaudio/best') + + def build_format_selector(self, format_spec): + def syntax_error(note, start): + message = ( + 'Invalid format specification: ' + '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1])) + return SyntaxError(message) + + PICKFIRST = 'PICKFIRST' + MERGE = 'MERGE' + SINGLE = 'SINGLE' + GROUP = 'GROUP' + FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) + + allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), + 'video': self.params.get('allow_multiple_video_streams', False)} + + def _parse_filter(tokens): + filter_parts = [] + for type, string_, start, _, _ in tokens: + if type == tokenize.OP and string_ == ']': + return ''.join(filter_parts) + else: + filter_parts.append(string_) + + def _remove_unused_ops(tokens): + # Remove operators that we don't use and join them with the surrounding strings. + # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' + ALLOWED_OPS = ('/', '+', ',', '(', ')') + last_string, last_start, last_end, last_line = None, None, None, None + for type, string_, start, end, line in tokens: + if type == tokenize.OP and string_ == '[': + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string_, start, end, line + # everything inside brackets will be handled by _parse_filter + for type, string_, start, end, line in tokens: + yield type, string_, start, end, line + if type == tokenize.OP and string_ == ']': + break + elif type == tokenize.OP and string_ in ALLOWED_OPS: + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string_, start, end, line + elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + if not last_string: + last_string = string_ + last_start = start + last_end = end + else: + last_string += string_ + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + + def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): + selectors = [] + current_selector = None + for type, string_, start, _, _ in tokens: + # ENCODING is only defined in Python 3.x + if type == getattr(tokenize, 'ENCODING', None): + continue + elif type in [tokenize.NAME, tokenize.NUMBER]: + current_selector = FormatSelector(SINGLE, string_, []) + elif type == tokenize.OP: + if string_ == ')': + if not inside_group: + # ')' will be handled by the parentheses group + tokens.restore_last_token() + break + elif inside_merge and string_ in ['/', ',']: + tokens.restore_last_token() + break + elif inside_choice and string_ == ',': + tokens.restore_last_token() + break + elif string_ == ',': + if not current_selector: + raise syntax_error('"," must follow a format selector', start) + selectors.append(current_selector) + current_selector = None + elif string_ == '/': + if not current_selector: + raise syntax_error('"/" must follow a format selector', start) + first_choice = current_selector + second_choice = _parse_format_selection(tokens, inside_choice=True) + current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) + elif string_ == '[': + if not current_selector: + current_selector = FormatSelector(SINGLE, 'best', []) + format_filter = _parse_filter(tokens) + current_selector.filters.append(format_filter) + elif string_ == '(': + if current_selector: + raise syntax_error('Unexpected "("', start) + group = _parse_format_selection(tokens, inside_group=True) + current_selector = FormatSelector(GROUP, group, []) + elif string_ == '+': + if not current_selector: + raise syntax_error('Unexpected "+"', start) + selector_1 = current_selector + selector_2 = _parse_format_selection(tokens, inside_merge=True) + if not selector_2: + raise syntax_error('Expected a selector', start) + current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) + else: + raise syntax_error(f'Operator not recognized: "{string_}"', start) + elif type == tokenize.ENDMARKER: + break + if current_selector: + selectors.append(current_selector) + return selectors + + def _merge(formats_pair): + format_1, format_2 = formats_pair + + formats_info = [] + formats_info.extend(format_1.get('requested_formats', (format_1,))) + formats_info.extend(format_2.get('requested_formats', (format_2,))) + + if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: + get_no_more = {'video': False, 'audio': False} + for (i, fmt_info) in enumerate(formats_info): + if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none': + formats_info.pop(i) + continue + for aud_vid in ['audio', 'video']: + if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': + if get_no_more[aud_vid]: + formats_info.pop(i) + break + get_no_more[aud_vid] = True + + if len(formats_info) == 1: + return formats_info[0] + + video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] + audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] + + the_only_video = video_fmts[0] if len(video_fmts) == 1 else None + the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None + + output_ext = get_compatible_ext( + vcodecs=[f.get('vcodec') for f in video_fmts], + acodecs=[f.get('acodec') for f in audio_fmts], + vexts=[f['ext'] for f in video_fmts], + aexts=[f['ext'] for f in audio_fmts], + preferences=(try_call(lambda: self.params['merge_output_format'].split('/')) + or self.params.get('prefer_free_formats') and ('webm', 'mkv'))) + + filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) + + new_dict = { + 'requested_formats': formats_info, + 'format': '+'.join(filtered('format')), + 'format_id': '+'.join(filtered('format_id')), + 'ext': output_ext, + 'protocol': '+'.join(map(determine_protocol, formats_info)), + 'language': '+'.join(orderedSet(filtered('language'))) or None, + 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None, + 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None, + 'tbr': sum(filtered('tbr', 'vbr', 'abr')), + } + + if the_only_video: + new_dict.update({ + 'width': the_only_video.get('width'), + 'height': the_only_video.get('height'), + 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), + 'fps': the_only_video.get('fps'), + 'dynamic_range': the_only_video.get('dynamic_range'), + 'vcodec': the_only_video.get('vcodec'), + 'vbr': the_only_video.get('vbr'), + 'stretched_ratio': the_only_video.get('stretched_ratio'), + 'aspect_ratio': the_only_video.get('aspect_ratio'), + }) + + if the_only_audio: + new_dict.update({ + 'acodec': the_only_audio.get('acodec'), + 'abr': the_only_audio.get('abr'), + 'asr': the_only_audio.get('asr'), + 'audio_channels': the_only_audio.get('audio_channels') + }) + + return new_dict + + def _check_formats(formats): + if self.params.get('check_formats') == 'selected': + yield from self._check_formats(formats) + return + elif (self.params.get('check_formats') is not None + or self.params.get('allow_unplayable_formats')): + yield from formats + return + + for f in formats: + if f.get('has_drm') or f.get('__needs_testing'): + yield from self._check_formats([f]) + else: + yield f + + def _build_selector_function(selector): + if isinstance(selector, list): # , + fs = [_build_selector_function(s) for s in selector] + + def selector_function(ctx): + for f in fs: + yield from f(ctx) + return selector_function + + elif selector.type == GROUP: # () + selector_function = _build_selector_function(selector.selector) + + elif selector.type == PICKFIRST: # / + fs = [_build_selector_function(s) for s in selector.selector] + + def selector_function(ctx): + for f in fs: + picked_formats = list(f(ctx)) + if picked_formats: + return picked_formats + return [] + + elif selector.type == MERGE: # + + selector_1, selector_2 = map(_build_selector_function, selector.selector) + + def selector_function(ctx): + for pair in itertools.product(selector_1(ctx), selector_2(ctx)): + yield _merge(pair) + + elif selector.type == SINGLE: # atom + format_spec = selector.selector or 'best' + + # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector + if format_spec == 'all': + def selector_function(ctx): + yield from _check_formats(ctx['formats'][::-1]) + elif format_spec == 'mergeall': + def selector_function(ctx): + formats = list(_check_formats( + f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none')) + if not formats: + return + merged_format = formats[-1] + for f in formats[-2::-1]: + merged_format = _merge((merged_format, f)) + yield merged_format + + else: + format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1 + mobj = re.match( + r'(?Pbest|worst|b|w)(?Pvideo|audio|v|a)?(?P\*)?(?:\.(?P[1-9]\d*))?$', + format_spec) + if mobj is not None: + format_idx = int_or_none(mobj.group('n'), default=1) + format_reverse = mobj.group('bw')[0] == 'b' + format_type = (mobj.group('type') or [None])[0] + not_format_type = {'v': 'a', 'a': 'v'}.get(format_type) + format_modified = mobj.group('mod') is not None + + format_fallback = not format_type and not format_modified # for b, w + _filter_f = ( + (lambda f: f.get('%scodec' % format_type) != 'none') + if format_type and format_modified # bv*, ba*, wv*, wa* + else (lambda f: f.get('%scodec' % not_format_type) == 'none') + if format_type # bv, ba, wv, wa + else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') + if not format_modified # b, w + else lambda f: True) # b*, w* + filter_f = lambda f: _filter_f(f) and ( + f.get('vcodec') != 'none' or f.get('acodec') != 'none') + else: + if format_spec in self._format_selection_exts['audio']: + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' + elif format_spec in self._format_selection_exts['video']: + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' + seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none' + elif format_spec in self._format_selection_exts['storyboards']: + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' + else: + filter_f = lambda f: f.get('format_id') == format_spec # id + + def selector_function(ctx): + formats = list(ctx['formats']) + matches = list(filter(filter_f, formats)) if filter_f is not None else formats + if not matches: + if format_fallback and ctx['incomplete_formats']: + # for extractors with incomplete formats (audio only (soundcloud) + # or video only (imgur)) best/worst will fallback to + # best/worst {video,audio}-only format + matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats)) + elif seperate_fallback and not ctx['has_merged_format']: + # for compatibility with youtube-dl when there is no pre-merged format + matches = list(filter(seperate_fallback, formats)) + matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) + try: + yield matches[format_idx - 1] + except LazyList.IndexError: + return + + filters = [self._build_format_filter(f) for f in selector.filters] + + def final_selector(ctx): + ctx_copy = dict(ctx) + for _filter in filters: + ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) + return selector_function(ctx_copy) + return final_selector + + # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid + # Prefix numbers with random letters to avoid it being classified as a number + # See: https://github.com/yt-dlp/yt-dlp/pulls/8797 + # TODO: Implement parser not reliant on tokenize.tokenize + prefix = ''.join(random.choices(string.ascii_letters, k=32)) + stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode()) + try: + tokens = list(_remove_unused_ops( + token._replace(string=token.string.replace(prefix, '')) + for token in tokenize.tokenize(stream.readline))) + except tokenize.TokenError: + raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) + + class TokenIterator: + def __init__(self, tokens): + self.tokens = tokens + self.counter = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.counter >= len(self.tokens): + raise StopIteration() + value = self.tokens[self.counter] + self.counter += 1 + return value + + next = __next__ + + def restore_last_token(self): + self.counter -= 1 + + parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) + return _build_selector_function(parsed_selector) + + def _calc_headers(self, info_dict, load_cookies=False): + res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers')) + clean_headers(res) + + if load_cookies: # For --load-info-json + self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat + self._load_cookies(info_dict.get('cookies'), autoscope=False) + # The `Cookie` header is removed to prevent leaks and unscoped cookies. + # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj + res.pop('Cookie', None) + cookies = self.cookiejar.get_cookies_for_url(info_dict['url']) + if cookies: + encoder = LenientSimpleCookie() + values = [] + for cookie in cookies: + _, value = encoder.value_encode(cookie.value) + values.append(f'{cookie.name}={value}') + if cookie.domain: + values.append(f'Domain={cookie.domain}') + if cookie.path: + values.append(f'Path={cookie.path}') + if cookie.secure: + values.append('Secure') + if cookie.expires: + values.append(f'Expires={cookie.expires}') + if cookie.version: + values.append(f'Version={cookie.version}') + info_dict['cookies'] = '; '.join(values) + + if 'X-Forwarded-For' not in res: + x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') + if x_forwarded_for_ip: + res['X-Forwarded-For'] = x_forwarded_for_ip + + return res + + def _calc_cookies(self, url): + self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version') + return self.cookiejar.get_cookie_header(url) + + def _sort_thumbnails(self, thumbnails): + thumbnails.sort(key=lambda t: ( + t.get('preference') if t.get('preference') is not None else -1, + t.get('width') if t.get('width') is not None else -1, + t.get('height') if t.get('height') is not None else -1, + t.get('id') if t.get('id') is not None else '', + t.get('url'))) + + def _sanitize_thumbnails(self, info_dict): + thumbnails = info_dict.get('thumbnails') + if thumbnails is None: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] + if not thumbnails: + return + + def check_thumbnails(thumbnails): + for t in thumbnails: + self.to_screen(f'[info] Testing thumbnail {t["id"]}') + try: + self.urlopen(HEADRequest(t['url'])) + except network_exceptions as err: + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') + continue + yield t + + self._sort_thumbnails(thumbnails) + for i, t in enumerate(thumbnails): + if t.get('id') is None: + t['id'] = '%d' % i + if t.get('width') and t.get('height'): + t['resolution'] = '%dx%d' % (t['width'], t['height']) + t['url'] = sanitize_url(t['url']) + + if self.params.get('check_formats') is True: + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) + else: + info_dict['thumbnails'] = thumbnails + + def _fill_common_fields(self, info_dict, final=True): + # TODO: move sanitization here + if final: + title = info_dict['fulltitle'] = info_dict.get('title') + if not title: + if title == '': + self.write_debug('Extractor gave empty title. Creating a generic title') + else: + self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') + info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}' + + if info_dict.get('duration') is not None: + info_dict['duration_string'] = formatSeconds(info_dict['duration']) + + for ts_key, date_key in ( + ('timestamp', 'upload_date'), + ('release_timestamp', 'release_date'), + ('modified_timestamp', 'modified_date'), + ): + if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: + # Working around out-of-range timestamp values (e.g. negative ones on Windows, + # see http://bugs.python.org/issue1646728) + with contextlib.suppress(ValueError, OverflowError, OSError): + upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc) + info_dict[date_key] = upload_date.strftime('%Y%m%d') + + if not info_dict.get('release_year'): + info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])})) + + live_keys = ('is_live', 'was_live') + live_status = info_dict.get('live_status') + if live_status is None: + for key in live_keys: + if info_dict.get(key) is False: + continue + if info_dict.get(key): + live_status = key + break + if all(info_dict.get(key) is False for key in live_keys): + live_status = 'not_live' + if live_status: + info_dict['live_status'] = live_status + for key in live_keys: + if info_dict.get(key) is None: + info_dict[key] = (live_status == key) + if live_status == 'post_live': + info_dict['was_live'] = True + + # Auto generate title fields corresponding to the *_number fields when missing + # in order to always have clean titles. This is very common for TV series. + for field in ('chapter', 'season', 'episode'): + if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + + for old_key, new_key in self._deprecated_multivalue_fields.items(): + if new_key in info_dict and old_key in info_dict: + if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json + self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present') + elif old_value := info_dict.get(old_key): + info_dict[new_key] = old_value.split(', ') + elif new_value := info_dict.get(new_key): + info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value) + + def _raise_pending_errors(self, info): + err = info.pop('__pending_error', None) + if err: + self.report_error(err, tb=False) + + def sort_formats(self, info_dict): + formats = self._get_formats(info_dict) + formats.sort(key=FormatSorter( + self, info_dict.get('_format_sort_fields') or []).calculate_preference) + + def process_video_result(self, info_dict, download=True): + assert info_dict.get('_type', 'video') == 'video' + self._num_videos += 1 + + if 'id' not in info_dict: + raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor']) + elif not info_dict.get('id'): + raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor']) + + def report_force_conversion(field, field_not, conversion): + self.report_warning( + '"%s" field is not %s - forcing %s conversion, there is an error in extractor' + % (field, field_not, conversion)) + + def sanitize_string_field(info, string_field): + field = info.get(string_field) + if field is None or isinstance(field, str): + return + report_force_conversion(string_field, 'a string', 'string') + info[string_field] = str(field) + + def sanitize_numeric_fields(info): + for numeric_field in self._NUMERIC_FIELDS: + field = info.get(numeric_field) + if field is None or isinstance(field, (int, float)): + continue + report_force_conversion(numeric_field, 'numeric', 'int') + info[numeric_field] = int_or_none(field) + + sanitize_string_field(info_dict, 'id') + sanitize_numeric_fields(info_dict) + if info_dict.get('section_end') and info_dict.get('section_start') is not None: + info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3) + if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None): + self.report_warning('"duration" field is negative, there is an error in extractor') + + chapters = info_dict.get('chapters') or [] + if chapters and chapters[0].get('start_time'): + chapters.insert(0, {'start_time': 0}) + + dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')} + for idx, (prev, current, next_) in enumerate(zip( + (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1): + if current.get('start_time') is None: + current['start_time'] = prev.get('end_time') + if not current.get('end_time'): + current['end_time'] = next_.get('start_time') + if not current.get('title'): + current['title'] = f'' + + if 'playlist' not in info_dict: + # It isn't part of a playlist + info_dict['playlist'] = None + info_dict['playlist_index'] = None + + self._sanitize_thumbnails(info_dict) + + thumbnail = info_dict.get('thumbnail') + thumbnails = info_dict.get('thumbnails') + if thumbnail: + info_dict['thumbnail'] = sanitize_url(thumbnail) + elif thumbnails: + info_dict['thumbnail'] = thumbnails[-1]['url'] + + if info_dict.get('display_id') is None and 'id' in info_dict: + info_dict['display_id'] = info_dict['id'] + + self._fill_common_fields(info_dict) + + for cc_kind in ('subtitles', 'automatic_captions'): + cc = info_dict.get(cc_kind) + if cc: + for _, subtitle in cc.items(): + for subtitle_format in subtitle: + if subtitle_format.get('url'): + subtitle_format['url'] = sanitize_url(subtitle_format['url']) + if subtitle_format.get('ext') is None: + subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() + + automatic_captions = info_dict.get('automatic_captions') + subtitles = info_dict.get('subtitles') + + info_dict['requested_subtitles'] = self.process_subtitles( + info_dict['id'], subtitles, automatic_captions) + + formats = self._get_formats(info_dict) + + # Backward compatibility with InfoExtractor._sort_formats + field_preference = (formats or [{}])[0].pop('__sort_fields', None) + if field_preference: + info_dict['_format_sort_fields'] = field_preference + + info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it + f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None + if not self.params.get('allow_unplayable_formats'): + formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe'] + + if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats): + self.report_warning( + f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}' + 'only images are available for download. Use --list-formats to see them'.capitalize()) + + get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start')) + if not get_from_start: + info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + if info_dict.get('is_live') and formats: + formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] + if get_from_start and not formats: + self.raise_no_formats(info_dict, msg=( + '--live-from-start is passed, but there are no formats that can be downloaded from the start. ' + 'If you want to download from the current time, use --no-live-from-start')) + + def is_wellformed(f): + url = f.get('url') + if not url: + self.report_warning( + '"url" field is missing or empty - skipping format, ' + 'there is an error in extractor') + return False + if isinstance(url, bytes): + sanitize_string_field(f, 'url') + return True + + # Filter out malformed formats for better extraction robustness + formats = list(filter(is_wellformed, formats or [])) + + if not formats: + self.raise_no_formats(info_dict) + + for format in formats: + sanitize_string_field(format, 'format_id') + sanitize_numeric_fields(format) + format['url'] = sanitize_url(format['url']) + if format.get('ext') is None: + format['ext'] = determine_ext(format['url']).lower() + if format.get('protocol') is None: + format['protocol'] = determine_protocol(format) + if format.get('resolution') is None: + format['resolution'] = self.format_resolution(format, default=None) + if format.get('dynamic_range') is None and format.get('vcodec') != 'none': + format['dynamic_range'] = 'SDR' + if format.get('aspect_ratio') is None: + format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) + # For fragmented formats, "tbr" is often max bitrate and not average + if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url')) + and info_dict.get('duration') and format.get('tbr') + and not format.get('filesize') and not format.get('filesize_approx')): + format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) + format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True) + + # Safeguard against old/insecure infojson when using --load-info-json + if info_dict.get('http_headers'): + info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers']) + info_dict['http_headers'].pop('Cookie', None) + + # This is copied to http_headers by the above _calc_headers and can now be removed + if '__x_forwarded_for_ip' in info_dict: + del info_dict['__x_forwarded_for_ip'] + + self.sort_formats({ + 'formats': formats, + '_format_sort_fields': info_dict.get('_format_sort_fields') + }) + + # Sanitize and group by format_id + formats_dict = {} + for i, format in enumerate(formats): + if not format.get('format_id'): + format['format_id'] = str(i) + else: + # Sanitize format_id from characters used in format selector expression + format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) + formats_dict.setdefault(format['format_id'], []).append(format) + + # Make sure all formats have unique format_id + common_exts = set(itertools.chain(*self._format_selection_exts.values())) + for format_id, ambiguous_formats in formats_dict.items(): + ambigious_id = len(ambiguous_formats) > 1 + for i, format in enumerate(ambiguous_formats): + if ambigious_id: + format['format_id'] = '%s-%d' % (format_id, i) + # Ensure there is no conflict between id and ext in format selection + # See https://github.com/yt-dlp/yt-dlp/issues/1282 + if format['format_id'] != format['ext'] and format['format_id'] in common_exts: + format['format_id'] = 'f%s' % format['format_id'] + + if format.get('format') is None: + format['format'] = '{id} - {res}{note}'.format( + id=format['format_id'], + res=self.format_resolution(format), + note=format_field(format, 'format_note', ' (%s)'), + ) + + if self.params.get('check_formats') is True: + formats = LazyList(self._check_formats(formats[::-1]), reverse=True) + + if not formats or formats[0] is not info_dict: + # only set the 'formats' fields if the original info_dict list them + # otherwise we end up with a circular reference, the first (and unique) + # element in the 'formats' field in info_dict is info_dict itself, + # which can't be exported to json + info_dict['formats'] = formats + + info_dict, _ = self.pre_process(info_dict) + + if self._match_entry(info_dict, incomplete=self._format_fields) is not None: + return info_dict + + self.post_extract(info_dict) + info_dict, _ = self.pre_process(info_dict, 'after_filter') + + # The pre-processors may have modified the formats + formats = self._get_formats(info_dict) + + list_only = self.params.get('simulate') == 'list_only' + interactive_format_selection = not list_only and self.format_selector == '-' + if self.params.get('list_thumbnails'): + self.list_thumbnails(info_dict) + if self.params.get('listsubtitles'): + if 'automatic_captions' in info_dict: + self.list_subtitles( + info_dict['id'], automatic_captions, 'automatic captions') + self.list_subtitles(info_dict['id'], subtitles, 'subtitles') + if self.params.get('listformats') or interactive_format_selection: + self.list_formats(info_dict) + if list_only: + # Without this printing, -F --print-json will not work + self.__forced_printings(info_dict) + return info_dict + + format_selector = self.format_selector + while True: + if interactive_format_selection: + req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS) + + '(Press ENTER for default, or Ctrl+C to quit)' + + self._format_screen(': ', self.Styles.EMPHASIS)) + try: + format_selector = self.build_format_selector(req_format) if req_format else None + except SyntaxError as err: + self.report_error(err, tb=False, is_error=False) + continue + + if format_selector is None: + req_format = self._default_format_spec(info_dict, download=download) + self.write_debug(f'Default format spec: {req_format}') + format_selector = self.build_format_selector(req_format) + + formats_to_download = list(format_selector({ + 'formats': formats, + 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats), + 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video + or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio + })) + if interactive_format_selection and not formats_to_download: + self.report_error('Requested format is not available', tb=False, is_error=False) + continue + break + + if not formats_to_download: + if not self.params.get('ignore_no_formats_error'): + raise ExtractorError( + 'Requested format is not available. Use --list-formats for a list of available formats', + expected=True, video_id=info_dict['id'], ie=info_dict['extractor']) + self.report_warning('Requested format is not available') + # Process what we can, even without any available formats. + formats_to_download = [{}] + + requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self)) + best_format, downloaded_formats = formats_to_download[-1], [] + if download: + if best_format and requested_ranges: + def to_screen(*msg): + self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}') + + to_screen(f'Downloading {len(formats_to_download)} format(s):', + (f['format_id'] for f in formats_to_download)) + if requested_ranges != ({}, ): + to_screen(f'Downloading {len(requested_ranges)} time ranges:', + (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges)) + max_downloads_reached = False + + for fmt, chapter in itertools.product(formats_to_download, requested_ranges): + new_info = self._copy_infodict(info_dict) + new_info.update(fmt) + offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf') + end_time = offset + min(chapter.get('end_time', duration), duration) + # duration may not be accurate. So allow deviations <1sec + if end_time == float('inf') or end_time > offset + duration + 1: + end_time = None + if chapter or offset: + new_info.update({ + 'section_start': offset + chapter.get('start_time', 0), + 'section_end': end_time, + 'section_title': chapter.get('title'), + 'section_number': chapter.get('index'), + }) + downloaded_formats.append(new_info) + try: + self.process_info(new_info) + except MaxDownloadsReached: + max_downloads_reached = True + self._raise_pending_errors(new_info) + # Remove copied info + for key, val in tuple(new_info.items()): + if info_dict.get(key) == val: + new_info.pop(key) + if max_downloads_reached: + break + + write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats} + assert write_archive.issubset({True, False, 'ignore'}) + if True in write_archive and False not in write_archive: + self.record_download_archive(info_dict) + + info_dict['requested_downloads'] = downloaded_formats + info_dict = self.run_all_pps('after_video', info_dict) + if max_downloads_reached: + raise MaxDownloadsReached() + + # We update the info dict with the selected best quality format (backwards compatibility) + info_dict.update(best_format) + return info_dict + + def process_subtitles(self, video_id, normal_subtitles, automatic_captions): + """Select the requested subtitles and their format""" + available_subs, normal_sub_langs = {}, [] + if normal_subtitles and self.params.get('writesubtitles'): + available_subs.update(normal_subtitles) + normal_sub_langs = tuple(normal_subtitles.keys()) + if automatic_captions and self.params.get('writeautomaticsub'): + for lang, cap_info in automatic_captions.items(): + if lang not in available_subs: + available_subs[lang] = cap_info + + if not available_subs or ( + not self.params.get('writesubtitles') + and not self.params.get('writeautomaticsub')): + return None + + all_sub_langs = tuple(available_subs.keys()) + if self.params.get('allsubtitles', False): + requested_langs = all_sub_langs + elif self.params.get('subtitleslangs', False): + try: + requested_langs = orderedSet_from_options( + self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True) + except re.error as e: + raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}') + else: + requested_langs = LazyList(itertools.chain( + ['en'] if 'en' in normal_sub_langs else [], + filter(lambda f: f.startswith('en'), normal_sub_langs), + ['en'] if 'en' in all_sub_langs else [], + filter(lambda f: f.startswith('en'), all_sub_langs), + normal_sub_langs, all_sub_langs, + ))[:1] + if requested_langs: + self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}') + + formats_query = self.params.get('subtitlesformat', 'best') + formats_preference = formats_query.split('/') if formats_query else [] + subs = {} + for lang in requested_langs: + formats = available_subs.get(lang) + if formats is None: + self.report_warning(f'{lang} subtitles not available for {video_id}') + continue + for ext in formats_preference: + if ext == 'best': + f = formats[-1] + break + matches = list(filter(lambda f: f['ext'] == ext, formats)) + if matches: + f = matches[-1] + break + else: + f = formats[-1] + self.report_warning( + 'No subtitle format found matching "%s" for language %s, ' + 'using %s' % (formats_query, lang, f['ext'])) + subs[lang] = f + return subs + + def _forceprint(self, key, info_dict): + if info_dict is None: + return + info_copy = info_dict.copy() + info_copy.setdefault('filename', self.prepare_filename(info_dict)) + if info_dict.get('requested_formats') is not None: + # For RTMP URLs, also include the playpath + info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) + elif info_dict.get('url'): + info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '') + info_copy['formats_table'] = self.render_formats_table(info_dict) + info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict) + info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles')) + info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions')) + + def format_tmpl(tmpl): + mobj = re.fullmatch(r'([\w.:,]|-\d|(?P{([\w.:,]|-\d)+}))+=?', tmpl) + if not mobj: + return tmpl + + fmt = '%({})s' + if tmpl.startswith('{'): + tmpl, fmt = f'.{tmpl}', '%({})j' + if tmpl.endswith('='): + tmpl, fmt = tmpl[:-1], '{0} = %({0})#j' + return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(','))) + + for tmpl in self.params['forceprint'].get(key, []): + self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy)) + + for tmpl, file_tmpl in self.params['print_to_file'].get(key, []): + filename = self.prepare_filename(info_dict, outtmpl=file_tmpl) + tmpl = format_tmpl(tmpl) + self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') + if self._ensure_dir_exists(filename): + with open(filename, 'a', encoding='utf-8', newline='') as f: + f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep) + + return info_copy + + def __forced_printings(self, info_dict, filename=None, incomplete=True): + if (self.params.get('forcejson') + or self.params['forceprint'].get('video') + or self.params['print_to_file'].get('video')): + self.post_extract(info_dict) + if filename: + info_dict['filename'] = filename + info_copy = self._forceprint('video', info_dict) + + def print_field(field, actual_field=None, optional=False): + if actual_field is None: + actual_field = field + if self.params.get(f'force{field}') and ( + info_copy.get(field) is not None or (not optional and not incomplete)): + self.to_stdout(info_copy[actual_field]) + + print_field('title') + print_field('id') + print_field('url', 'urls') + print_field('thumbnail', optional=True) + print_field('description', optional=True) + print_field('filename') + if self.params.get('forceduration') and info_copy.get('duration') is not None: + self.to_stdout(formatSeconds(info_copy['duration'])) + print_field('format') + + if self.params.get('forcejson'): + self.to_stdout(json.dumps(self.sanitize_info(info_dict))) + + def dl(self, name, info, subtitle=False, test=False): + if not info.get('url'): + self.raise_no_formats(info, True) + + if test: + verbose = self.params.get('verbose') + params = { + 'test': True, + 'quiet': self.params.get('quiet') or not verbose, + 'verbose': verbose, + 'noprogress': not verbose, + 'nopart': True, + 'skip_unavailable_fragments': False, + 'keep_fragments': False, + 'overwrites': True, + '_no_ytdl_file': True, + } + else: + params = self.params + fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) + if not test: + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + urls = '", "'.join( + (f['url'].split(',')[0] + ',' if f['url'].startswith('data:') else f['url']) + for f in info.get('requested_formats', []) or [info]) + self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"') + + # Note: Ideally info should be a deep-copied so that hooks cannot modify it. + # But it may contain objects that are not deep-copyable + new_info = self._copy_infodict(info) + if new_info.get('http_headers') is None: + new_info['http_headers'] = self._calc_headers(new_info) + return fd.download(name, new_info, subtitle) + + def existing_file(self, filepaths, *, default_overwrite=True): + existing_files = list(filter(os.path.exists, orderedSet(filepaths))) + if existing_files and not self.params.get('overwrites', default_overwrite): + return existing_files[0] + + for file in existing_files: + self.report_file_delete(file) + os.remove(file) + return None + + def process_info(self, info_dict): + """Process a single resolved IE result. (Modifies it in-place)""" + + assert info_dict.get('_type', 'video') == 'video' + original_infodict = info_dict + + if 'format' not in info_dict and 'ext' in info_dict: + info_dict['format'] = info_dict['ext'] + + if self._match_entry(info_dict) is not None: + info_dict['__write_download_archive'] = 'ignore' + return + + # Does nothing under normal operation - for backward compatibility of process_info + self.post_extract(info_dict) + + def replace_info_dict(new_info): + nonlocal info_dict + if new_info == info_dict: + return + info_dict.clear() + info_dict.update(new_info) + + new_info, _ = self.pre_process(info_dict, 'video') + replace_info_dict(new_info) + self._num_downloads += 1 + + # info_dict['_filename'] needs to be set for backward compatibility + info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) + temp_filename = self.prepare_filename(info_dict, 'temp') + files_to_move = {} + + # Forced printings + self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) + + def check_max_downloads(): + if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'): + raise MaxDownloadsReached() + + if self.params.get('simulate'): + info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') + check_max_downloads() + return + + if full_filename is None: + return + if not self._ensure_dir_exists(encodeFilename(full_filename)): + return + if not self._ensure_dir_exists(encodeFilename(temp_filename)): + return + + if self._write_description('video', info_dict, + self.prepare_filename(info_dict, 'description')) is None: + return + + sub_files = self._write_subtitles(info_dict, temp_filename) + if sub_files is None: + return + files_to_move.update(dict(sub_files)) + + thumb_files = self._write_thumbnails( + 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail')) + if thumb_files is None: + return + files_to_move.update(dict(thumb_files)) + + infofn = self.prepare_filename(info_dict, 'infojson') + _infojson_written = self._write_info_json('video', info_dict, infofn) + if _infojson_written: + info_dict['infojson_filename'] = infofn + # For backward compatibility, even though it was a private field + info_dict['__infojson_filename'] = infofn + elif _infojson_written is None: + return + + # Note: Annotations are deprecated + annofn = None + if self.params.get('writeannotations', False): + annofn = self.prepare_filename(info_dict, 'annotation') + if annofn: + if not self._ensure_dir_exists(encodeFilename(annofn)): + return + if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): + self.to_screen('[info] Video annotations are already present') + elif not info_dict.get('annotations'): + self.report_warning('There are no annotations to write.') + else: + try: + self.to_screen('[info] Writing video annotations to: ' + annofn) + with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + annofile.write(info_dict['annotations']) + except (KeyError, TypeError): + self.report_warning('There are no annotations to write.') + except OSError: + self.report_error('Cannot write annotations file: ' + annofn) + return + + # Write internet shortcut files + def _write_link_file(link_type): + url = try_get(info_dict['webpage_url'], iri_to_uri) + if not url: + self.report_warning( + f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') + return True + linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) + if not self._ensure_dir_exists(encodeFilename(linkfn)): + return False + if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): + self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') + return True + try: + self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') + with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + newline='\r\n' if link_type == 'url' else '\n') as linkfile: + template_vars = {'url': url} + if link_type == 'desktop': + template_vars['filename'] = linkfn[:-(len(link_type) + 1)] + linkfile.write(LINK_TEMPLATES[link_type] % template_vars) + except OSError: + self.report_error(f'Cannot write internet shortcut {linkfn}') + return False + return True + + write_links = { + 'url': self.params.get('writeurllink'), + 'webloc': self.params.get('writewebloclink'), + 'desktop': self.params.get('writedesktoplink'), + } + if self.params.get('writelink'): + link_type = ('webloc' if sys.platform == 'darwin' + else 'desktop' if sys.platform.startswith('linux') + else 'url') + write_links[link_type] = True + + if any(should_write and not _write_link_file(link_type) + for link_type, should_write in write_links.items()): + return + + new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) + replace_info_dict(new_info) + + if self.params.get('skip_download'): + info_dict['filepath'] = temp_filename + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + info_dict['__files_to_move'] = files_to_move + replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) + info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') + else: + # Download + info_dict.setdefault('__postprocessors', []) + try: + + def existing_video_file(*filepaths): + ext = info_dict.get('ext') + converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext) + file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)), + default_overwrite=False) + if file: + info_dict['ext'] = os.path.splitext(file)[1][1:] + return file + + fd, success = None, True + if info_dict.get('protocol') or info_dict.get('url'): + fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') + if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and ( + info_dict.get('section_start') or info_dict.get('section_end')): + msg = ('This format cannot be partially downloaded' if FFmpegFD.available() + else 'You have requested downloading the video partially, but ffmpeg is not installed') + self.report_error(f'{msg}. Aborting') + return + + if info_dict.get('requested_formats') is not None: + old_ext = info_dict['ext'] + if self.params.get('merge_output_format') is None: + if (info_dict['ext'] == 'webm' + and info_dict.get('thumbnails') + # check with type instead of pp_key, __name__, or isinstance + # since we dont want any custom PPs to trigger this + and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721 + info_dict['ext'] = 'mkv' + self.report_warning( + 'webm doesn\'t support embedding a thumbnail, mkv will be used') + new_ext = info_dict['ext'] + + def correct_ext(filename, ext=new_ext): + if filename == '-': + return filename + filename_real_ext = os.path.splitext(filename)[1][1:] + filename_wo_ext = ( + os.path.splitext(filename)[0] + if filename_real_ext in (old_ext, new_ext) + else filename) + return f'{filename_wo_ext}.{ext}' + + # Ensure filename always has a correct extension for successful merge + full_filename = correct_ext(full_filename) + temp_filename = correct_ext(temp_filename) + dl_filename = existing_video_file(full_filename, temp_filename) + + info_dict['__real_download'] = False + # NOTE: Copy so that original format dicts are not modified + info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats'])) + + merger = FFmpegMergerPP(self) + downloaded = [] + if dl_filename is not None: + self.report_file_already_downloaded(dl_filename) + elif fd: + for f in info_dict['requested_formats'] if fd != FFmpegFD else []: + f['filepath'] = fname = prepend_extension( + correct_ext(temp_filename, info_dict['ext']), + 'f%s' % f['format_id'], info_dict['ext']) + downloaded.append(fname) + info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats']) + success, real_download = self.dl(temp_filename, info_dict) + info_dict['__real_download'] = real_download + else: + if self.params.get('allow_unplayable_formats'): + self.report_warning( + 'You have requested merging of multiple formats ' + 'while also allowing unplayable formats to be downloaded. ' + 'The formats won\'t be merged to prevent data corruption.') + elif not merger.available: + msg = 'You have requested merging of multiple formats but ffmpeg is not installed' + if not self.params.get('ignoreerrors'): + self.report_error(f'{msg}. Aborting due to --abort-on-error') + return + self.report_warning(f'{msg}. The formats won\'t be merged') + + if temp_filename == '-': + reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params) + else 'but the formats are incompatible for simultaneous download' if merger.available + else 'but ffmpeg is not installed') + self.report_warning( + f'You have requested downloading multiple formats to stdout {reason}. ' + 'The formats will be streamed one after the other') + fname = temp_filename + for f in info_dict['requested_formats']: + new_info = dict(info_dict) + del new_info['requested_formats'] + new_info.update(f) + if temp_filename != '-': + fname = prepend_extension( + correct_ext(temp_filename, new_info['ext']), + 'f%s' % f['format_id'], new_info['ext']) + if not self._ensure_dir_exists(fname): + return + f['filepath'] = fname + downloaded.append(fname) + partial_success, real_download = self.dl(fname, new_info) + info_dict['__real_download'] = info_dict['__real_download'] or real_download + success = success and partial_success + + if downloaded and merger.available and not self.params.get('allow_unplayable_formats'): + info_dict['__postprocessors'].append(merger) + info_dict['__files_to_merge'] = downloaded + # Even if there were no downloads, it is being merged only now + info_dict['__real_download'] = True + else: + for file in downloaded: + files_to_move[file] = None + else: + # Just a single file + dl_filename = existing_video_file(full_filename, temp_filename) + if dl_filename is None or dl_filename == temp_filename: + # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part. + # So we should try to resume the download + success, real_download = self.dl(temp_filename, info_dict) + info_dict['__real_download'] = real_download + else: + self.report_file_already_downloaded(dl_filename) + + dl_filename = dl_filename or temp_filename + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + + except network_exceptions as err: + self.report_error('unable to download video data: %s' % error_to_compat_str(err)) + return + except OSError as err: + raise UnavailableVideoError(err) + except (ContentTooShortError, ) as err: + self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') + return + + self._raise_pending_errors(info_dict) + if success and full_filename != '-': + + def fixup(): + do_fixup = True + fixup_policy = self.params.get('fixup') + vid = info_dict['id'] + + if fixup_policy in ('ignore', 'never'): + return + elif fixup_policy == 'warn': + do_fixup = 'warn' + elif fixup_policy != 'force': + assert fixup_policy in ('detect_or_warn', None) + if not info_dict.get('__real_download'): + do_fixup = False + + def ffmpeg_fixup(cndn, msg, cls): + if not (do_fixup and cndn): + return + elif do_fixup == 'warn': + self.report_warning(f'{vid}: {msg}') + return + pp = cls(self) + if pp.available: + info_dict['__postprocessors'].append(pp) + else: + self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically') + + stretched_ratio = info_dict.get('stretched_ratio') + ffmpeg_fixup(stretched_ratio not in (1, None), + f'Non-uniform pixel ratio {stretched_ratio}', + FFmpegFixupStretchedPP) + + downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None + downloader = downloader.FD_NAME if downloader else None + + ext = info_dict.get('ext') + postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any(( + isinstance(pp, FFmpegVideoConvertorPP) + and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None) + ) for pp in self._pps['post_process']) + + if not postprocessed_by_ffmpeg: + ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a' + and info_dict.get('container') == 'm4a_dash', + 'writing DASH m4a. Only some players support this container', + FFmpegFixupM4aPP) + ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') + or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, + 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', + FFmpegFixupM3u8PP) + ffmpeg_fixup(downloader == 'dashsegments' + and (info_dict.get('is_live') or info_dict.get('is_dash_periods')), + 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) + + ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) + ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP) + + fixup() + try: + replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move)) + except PostProcessingError as err: + self.report_error('Postprocessing: %s' % str(err)) + return + try: + for ph in self._post_hooks: + ph(info_dict['filepath']) + except Exception as err: + self.report_error('post hooks: %s' % str(err)) + return + info_dict['__write_download_archive'] = True + + assert info_dict is original_infodict # Make sure the info_dict was modified in-place + if self.params.get('force_write_download_archive'): + info_dict['__write_download_archive'] = True + check_max_downloads() + + def __download_wrapper(self, func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + res = func(*args, **kwargs) + except UnavailableVideoError as e: + self.report_error(e) + except DownloadCancelled as e: + self.to_screen(f'[info] {e}') + if not self.params.get('break_per_url'): + raise + self._num_downloads = 0 + else: + if self.params.get('dump_single_json', False): + self.post_extract(res) + self.to_stdout(json.dumps(self.sanitize_info(res))) + return wrapper + + def download(self, url_list): + """Download a given list of URLs.""" + url_list = variadic(url_list) # Passing a single URL is a common mistake + outtmpl = self.params['outtmpl']['default'] + if (len(url_list) > 1 + and outtmpl != '-' + and '%' not in outtmpl + and self.params.get('max_downloads') != 1): + raise SameFileError(outtmpl) + + for url in url_list: + self.__download_wrapper(self.extract_info)( + url, force_generic_extractor=self.params.get('force_generic_extractor', False)) + + return self._download_retcode + + def download_with_info_file(self, info_filename): + with contextlib.closing(fileinput.FileInput( + [info_filename], mode='r', + openhook=fileinput.hook_encoded('utf-8'))) as f: + # FileInput doesn't have a read method, we can't call json.load + infos = [self.sanitize_info(info, self.params.get('clean_infojson', True)) + for info in variadic(json.loads('\n'.join(f)))] + for info in infos: + try: + self.__download_wrapper(self.process_ie_result)(info, download=True) + except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: + if not isinstance(e, EntryNotInPlaylist): + self.to_stderr('\r') + webpage_url = info.get('webpage_url') + if webpage_url is None: + raise + self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') + self.download([webpage_url]) + except ExtractorError as e: + self.report_error(e) + return self._download_retcode + + @staticmethod + def sanitize_info(info_dict, remove_private_keys=False): + ''' Sanitize the infodict for converting to json ''' + if info_dict is None: + return info_dict + info_dict.setdefault('epoch', int(time.time())) + info_dict.setdefault('_type', 'video') + info_dict.setdefault('_version', { + 'version': __version__, + 'current_git_head': current_git_head(), + 'release_git_head': RELEASE_GIT_HEAD, + 'repository': ORIGIN, + }) + + if remove_private_keys: + reject = lambda k, v: v is None or k.startswith('__') or k in { + 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', + 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url', + 'playlist_autonumber', + } + else: + reject = lambda k, v: False + + def filter_fn(obj): + if isinstance(obj, dict): + return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} + elif isinstance(obj, (list, tuple, set, LazyList)): + return list(map(filter_fn, obj)) + elif obj is None or isinstance(obj, (str, int, float, bool)): + return obj + else: + return repr(obj) + + return filter_fn(info_dict) + + @staticmethod + def filter_requested_info(info_dict, actually_filter=True): + ''' Alias of sanitize_info for backward compatibility ''' + return YoutubeDL.sanitize_info(info_dict, actually_filter) + + def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): + for filename in set(filter(None, files_to_delete)): + if msg: + self.to_screen(msg % filename) + try: + os.remove(filename) + except OSError: + self.report_warning(f'Unable to delete file {filename}') + if filename in info.get('__files_to_move', []): # NB: Delete even if None + del info['__files_to_move'][filename] + + @staticmethod + def post_extract(info_dict): + def actual_post_extract(info_dict): + if info_dict.get('_type') in ('playlist', 'multi_video'): + for video_dict in info_dict.get('entries', {}): + actual_post_extract(video_dict or {}) + return + + post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) + info_dict.update(post_extractor()) + + actual_post_extract(info_dict or {}) + + def run_pp(self, pp, infodict): + files_to_delete = [] + if '__files_to_move' not in infodict: + infodict['__files_to_move'] = {} + try: + files_to_delete, infodict = pp.run(infodict) + except PostProcessingError as e: + # Must be True and not 'only_download' + if self.params.get('ignoreerrors') is True: + self.report_error(e) + return infodict + raise + + if not files_to_delete: + return infodict + if self.params.get('keepvideo', False): + for f in files_to_delete: + infodict['__files_to_move'].setdefault(f, '') + else: + self._delete_downloaded_files( + *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)') + return infodict + + def run_all_pps(self, key, info, *, additional_pps=None): + if key != 'video': + self._forceprint(key, info) + for pp in (additional_pps or []) + self._pps[key]: + info = self.run_pp(pp, info) + return info + + def pre_process(self, ie_info, key='pre_process', files_to_move=None): + info = dict(ie_info) + info['__files_to_move'] = files_to_move or {} + try: + info = self.run_all_pps(key, info) + except PostProcessingError as err: + msg = f'Preprocessing: {err}' + info.setdefault('__pending_error', msg) + self.report_error(msg, is_error=False) + return info, info.pop('__files_to_move', None) + + def post_process(self, filename, info, files_to_move=None): + """Run all the postprocessors on the given file.""" + info['filepath'] = filename + info['__files_to_move'] = files_to_move or {} + info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors')) + info = self.run_pp(MoveFilesAfterDownloadPP(self), info) + del info['__files_to_move'] + return self.run_all_pps('after_move', info) + + def _make_archive_id(self, info_dict): + video_id = info_dict.get('id') + if not video_id: + return + # Future-proof against any change in case + # and backwards compatibility with prior versions + extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist + if extractor is None: + url = str_or_none(info_dict.get('url')) + if not url: + return + # Try to find matching extractor for the URL and take its ie_key + for ie_key, ie in self._ies.items(): + if ie.suitable(url): + extractor = ie_key + break + else: + return + return make_archive_id(extractor, video_id) + + def in_download_archive(self, info_dict): + if not self.archive: + return False + + vid_ids = [self._make_archive_id(info_dict)] + vid_ids.extend(info_dict.get('_old_archive_ids') or []) + return any(id_ in self.archive for id_ in vid_ids) + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = self._make_archive_id(info_dict) + assert vid_id + + self.write_debug(f'Adding to archive: {vid_id}') + if is_path_like(fn): + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + '\n') + self.archive.add(vid_id) + + @staticmethod + def format_resolution(format, default='unknown'): + if format.get('vcodec') == 'none' and format.get('acodec') != 'none': + return 'audio only' + if format.get('resolution') is not None: + return format['resolution'] + if format.get('width') and format.get('height'): + return '%dx%d' % (format['width'], format['height']) + elif format.get('height'): + return '%sp' % format['height'] + elif format.get('width'): + return '%dx?' % format['width'] + return default + + def _list_format_headers(self, *headers): + if self.params.get('listformats_table', True) is not False: + return [self._format_out(header, self.Styles.HEADERS) for header in headers] + return headers + + def _format_note(self, fdict): + res = '' + if fdict.get('ext') in ['f4f', 'f4m']: + res += '(unsupported)' + if fdict.get('language'): + if res: + res += ' ' + res += '[%s]' % fdict['language'] + if fdict.get('format_note') is not None: + if res: + res += ' ' + res += fdict['format_note'] + if fdict.get('tbr') is not None: + if res: + res += ', ' + res += '%4dk' % fdict['tbr'] + if fdict.get('container') is not None: + if res: + res += ', ' + res += '%s container' % fdict['container'] + if (fdict.get('vcodec') is not None + and fdict.get('vcodec') != 'none'): + if res: + res += ', ' + res += fdict['vcodec'] + if fdict.get('vbr') is not None: + res += '@' + elif fdict.get('vbr') is not None and fdict.get('abr') is not None: + res += 'video@' + if fdict.get('vbr') is not None: + res += '%4dk' % fdict['vbr'] + if fdict.get('fps') is not None: + if res: + res += ', ' + res += '%sfps' % fdict['fps'] + if fdict.get('acodec') is not None: + if res: + res += ', ' + if fdict['acodec'] == 'none': + res += 'video only' + else: + res += '%-5s' % fdict['acodec'] + elif fdict.get('abr') is not None: + if res: + res += ', ' + res += 'audio' + if fdict.get('abr') is not None: + res += '@%3dk' % fdict['abr'] + if fdict.get('asr') is not None: + res += ' (%5dHz)' % fdict['asr'] + if fdict.get('filesize') is not None: + if res: + res += ', ' + res += format_bytes(fdict['filesize']) + elif fdict.get('filesize_approx') is not None: + if res: + res += ', ' + res += '~' + format_bytes(fdict['filesize_approx']) + return res + + def _get_formats(self, info_dict): + if info_dict.get('formats') is None: + if info_dict.get('url') and info_dict.get('_type', 'video') == 'video': + return [info_dict] + return [] + return info_dict['formats'] + + def render_formats_table(self, info_dict): + formats = self._get_formats(info_dict) + if not formats: + return + if not self.params.get('listformats_table', True) is not False: + table = [ + [ + format_field(f, 'format_id'), + format_field(f, 'ext'), + self.format_resolution(f), + self._format_note(f) + ] for f in formats if (f.get('preference') or 0) >= -1000] + return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) + + def simplified_codec(f, field): + assert field in ('acodec', 'vcodec') + codec = f.get(field) + if not codec: + return 'unknown' + elif codec != 'none': + return '.'.join(codec.split('.')[:4]) + + if field == 'vcodec' and f.get('acodec') == 'none': + return 'images' + elif field == 'acodec' and f.get('vcodec') == 'none': + return '' + return self._format_out('audio only' if field == 'vcodec' else 'video only', + self.Styles.SUPPRESS) + + delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True) + table = [ + [ + self._format_out(format_field(f, 'format_id'), self.Styles.ID), + format_field(f, 'ext'), + format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), + format_field(f, 'fps', '\t%d', func=round), + format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), + format_field(f, 'audio_channels', '\t%s'), + delim, ( + format_field(f, 'filesize', ' \t%s', func=format_bytes) + or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes) + or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))), + None, self._format_out('~\t%s', self.Styles.SUPPRESS))), + format_field(f, 'tbr', '\t%dk', func=round), + shorten_protocol_name(f.get('protocol', '')), + delim, + simplified_codec(f, 'vcodec'), + format_field(f, 'vbr', '\t%dk', func=round), + simplified_codec(f, 'acodec'), + format_field(f, 'abr', '\t%dk', func=round), + format_field(f, 'asr', '\t%s', func=format_decimal_suffix), + join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty( + self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None, + (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe' + else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None), + format_field(f, 'format_note'), + format_field(f, 'container', ignore=(None, f.get('ext'))), + delim=', '), delim=' '), + ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] + header_line = self._list_format_headers( + 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO', + delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') + + return render_table( + header_line, table, hide_empty=True, + delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True)) + + def render_thumbnails_table(self, info_dict): + thumbnails = list(info_dict.get('thumbnails') or []) + if not thumbnails: + return None + return render_table( + self._list_format_headers('ID', 'Width', 'Height', 'URL'), + [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails]) + + def render_subtitles_table(self, video_id, subtitles): + def _row(lang, formats): + exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats))) + if len(set(names)) == 1: + names = [] if names[0] == 'unknown' else names[:1] + return [lang, ', '.join(names), ', '.join(exts)] + + if not subtitles: + return None + return render_table( + self._list_format_headers('Language', 'Name', 'Formats'), + [_row(lang, formats) for lang, formats in subtitles.items()], + hide_empty=True) + + def __list_table(self, video_id, name, func, *args): + table = func(*args) + if not table: + self.to_screen(f'{video_id} has no {name}') + return + self.to_screen(f'[info] Available {name} for {video_id}:') + self.to_stdout(table) + + def list_formats(self, info_dict): + self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict) + + def list_thumbnails(self, info_dict): + self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict) + + def list_subtitles(self, video_id, subtitles, name='subtitles'): + self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles) + + def print_debug_header(self): + if not self.params.get('verbose'): + return + + from . import _IN_CLI # Must be delayed import + + # These imports can be slow. So import them only as needed + from .extractor.extractors import _LAZY_LOADER + from .extractor.extractors import ( + _PLUGIN_CLASSES as plugin_ies, + _PLUGIN_OVERRIDES as plugin_ie_overrides + ) + + def get_encoding(stream): + ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) + additional_info = [] + if os.environ.get('TERM', '').lower() == 'dumb': + additional_info.append('dumb') + if not supports_terminal_sequences(stream): + from .utils import WINDOWS_VT_MODE # Must be imported locally + additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') + if additional_info: + ret = f'{ret} ({",".join(additional_info)})' + return ret + + encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % ( + locale.getpreferredencoding(), + sys.getfilesystemencoding(), + self.get_encoding(), + ', '.join( + f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ + if stream is not None and key != 'console') + ) + + logger = self.params.get('logger') + if logger: + write_debug = lambda msg: logger.debug(f'[debug] {msg}') + write_debug(encoding_str) + else: + write_string(f'[debug] {encoding_str}\n', encoding=None) + write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') + + source = detect_variant() + if VARIANT not in (None, 'pip'): + source += '*' + klass = type(self) + write_debug(join_nonempty( + f'{REPOSITORY.rpartition("/")[2]} version', + _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__), + f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '', + '' if source == 'unknown' else f'({source})', + '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}', + delim=' ')) + + if not _IN_CLI: + write_debug(f'params: {self.params}') + + if not _LAZY_LOADER: + if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): + write_debug('Lazy loading extractors is forcibly disabled') + else: + write_debug('Lazy loading extractors is disabled') + if self.params['compat_opts']: + write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) + + if current_git_head(): + write_debug(f'Git HEAD: {current_git_head()}') + write_debug(system_identifier()) + + exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) + ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} + if ffmpeg_features: + exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) + + exe_versions['rtmpdump'] = rtmpdump_version() + exe_versions['phantomjs'] = PhantomJSwrapper._version() + exe_str = ', '.join( + f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v + ) or 'none' + write_debug('exe versions: %s' % exe_str) + + from .compat.compat_utils import get_package_info + from .dependencies import available_dependencies + + write_debug('Optional libraries: %s' % (', '.join(sorted({ + join_nonempty(*get_package_info(m)) for m in available_dependencies.values() + })) or 'none')) + + write_debug(f'Proxy map: {self.proxies}') + write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): + display_list = ['%s%s' % ( + klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in plugins.items()] + if plugin_type == 'Extractor': + display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})' + for parent, plugins in plugin_ie_overrides.items()) + if not display_list: + continue + write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}') + + plugin_dirs = plugin_directories() + if plugin_dirs: + write_debug(f'Plugin directories: {plugin_dirs}') + + # Not implemented + if False and self.params.get('call_home'): + ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() + write_debug('Public IP address: %s' % ipaddr) + latest_version = self.urlopen( + 'https://yt-dl.org/latest/version').read().decode() + if version_tuple(latest_version) > version_tuple(__version__): + self.report_warning( + 'You are using an outdated version (newest version: %s)! ' + 'See https://yt-dl.org/update if you need help updating.' % + latest_version) + + @functools.cached_property + def proxies(self): + """Global proxy configuration""" + opts_proxy = self.params.get('proxy') + if opts_proxy is not None: + if opts_proxy == '': + opts_proxy = '__noproxy__' + proxies = {'all': opts_proxy} + else: + proxies = urllib.request.getproxies() + # compat. Set HTTPS_PROXY to __noproxy__ to revert + if 'http' in proxies and 'https' not in proxies: + proxies['https'] = proxies['http'] + + return proxies + + @functools.cached_property + def cookiejar(self): + """Global cookiejar instance""" + return load_cookies( + self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) + + @property + def _opener(self): + """ + Get a urllib OpenerDirector from the Urllib handler (deprecated). + """ + self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()') + handler = self._request_director.handlers['Urllib'] + return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies) + + def urlopen(self, req): + """ Start an HTTP download """ + if isinstance(req, str): + req = Request(req) + elif isinstance(req, urllib.request.Request): + self.deprecation_warning( + 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. ' + 'Use yt_dlp.networking.common.Request instead.') + req = urllib_req_to_req(req) + assert isinstance(req, Request) + + # compat: Assume user:pass url params are basic auth + url, basic_auth_header = extract_basic_auth(req.url) + if basic_auth_header: + req.headers['Authorization'] = basic_auth_header + req.url = sanitize_url(url) + + clean_proxies(proxies=req.proxies, headers=req.headers) + clean_headers(req.headers) + + try: + return self._request_director.send(req) + except NoSupportingHandlers as e: + for ue in e.unsupported_errors: + # FIXME: This depends on the order of errors. + if not (ue.handler and ue.msg): + continue + if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower(): + raise RequestError( + 'file:// URLs are disabled by default in yt-dlp for security reasons. ' + 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue + if 'unsupported proxy type: "https"' in ue.msg.lower(): + raise RequestError( + 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests') + + elif ( + re.match(r'unsupported url scheme: "wss?"', ue.msg.lower()) + and 'websockets' not in self._request_director.handlers + ): + raise RequestError( + 'This request requires WebSocket support. ' + 'Ensure one of the following dependencies are installed: websockets', + cause=ue) from ue + raise + except SSLError as e: + if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e): + raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e + elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e): + raise RequestError( + 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. ' + 'Try using --legacy-server-connect', cause=e) from e + raise + + def build_request_director(self, handlers, preferences=None): + logger = _YDLLogger(self) + headers = self.params['http_headers'].copy() + proxies = self.proxies.copy() + clean_headers(headers) + clean_proxies(proxies, headers) + + director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic')) + for handler in handlers: + director.add_handler(handler( + logger=logger, + headers=headers, + cookiejar=self.cookiejar, + proxies=proxies, + prefer_system_certs='no-certifi' in self.params['compat_opts'], + verify=not self.params.get('nocheckcertificate'), + **traverse_obj(self.params, { + 'verbose': 'debug_printtraffic', + 'source_address': 'source_address', + 'timeout': 'socket_timeout', + 'legacy_ssl_support': 'legacyserverconnect', + 'enable_file_urls': 'enable_file_urls', + 'client_cert': { + 'client_certificate': 'client_certificate', + 'client_certificate_key': 'client_certificate_key', + 'client_certificate_password': 'client_certificate_password', + }, + }), + )) + director.preferences.update(preferences or []) + if 'prefer-legacy-http-handler' in self.params['compat_opts']: + director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0) + return director + + @functools.cached_property + def _request_director(self): + return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES) + + def encode(self, s): + if isinstance(s, bytes): + return s # Already encoded + + try: + return s.encode(self.get_encoding()) + except UnicodeEncodeError as err: + err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.' + raise + + def get_encoding(self): + encoding = self.params.get('encoding') + if encoding is None: + encoding = preferredencoding() + return encoding + + def _write_info_json(self, label, ie_result, infofn, overwrite=None): + ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error ''' + if overwrite is None: + overwrite = self.params.get('overwrites', True) + if not self.params.get('writeinfojson'): + return False + elif not infofn: + self.write_debug(f'Skipping writing {label} infojson') + return False + elif not self._ensure_dir_exists(infofn): + return None + elif not overwrite and os.path.exists(infofn): + self.to_screen(f'[info] {label.title()} metadata is already present') + return 'exists' + + self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') + try: + write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) + return True + except OSError: + self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') + return None + + def _write_description(self, label, ie_result, descfn): + ''' Write description and returns True = written, False = skip, None = error ''' + if not self.params.get('writedescription'): + return False + elif not descfn: + self.write_debug(f'Skipping writing {label} description') + return False + elif not self._ensure_dir_exists(descfn): + return None + elif not self.params.get('overwrites', True) and os.path.exists(descfn): + self.to_screen(f'[info] {label.title()} description is already present') + elif ie_result.get('description') is None: + self.to_screen(f'[info] There\'s no {label} description to write') + return False + else: + try: + self.to_screen(f'[info] Writing {label} description to: {descfn}') + with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + descfile.write(ie_result['description']) + except OSError: + self.report_error(f'Cannot write {label} description file {descfn}') + return None + return True + + def _write_subtitles(self, info_dict, filename): + ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' + ret = [] + subtitles = info_dict.get('requested_subtitles') + if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + # subtitles download errors are already managed as troubles in relevant IE + # that way it will silently go on when used with unsupporting IE + return ret + elif not subtitles: + self.to_screen('[info] There are no subtitles for the requested languages') + return ret + sub_filename_base = self.prepare_filename(info_dict, 'subtitle') + if not sub_filename_base: + self.to_screen('[info] Skipping writing video subtitles') + return ret + + for sub_lang, sub_info in subtitles.items(): + sub_format = sub_info['ext'] + sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) + sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext')) + existing_sub = self.existing_file((sub_filename_final, sub_filename)) + if existing_sub: + self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present') + sub_info['filepath'] = existing_sub + ret.append((existing_sub, sub_filename_final)) + continue + + self.to_screen(f'[info] Writing video subtitles to: {sub_filename}') + if sub_info.get('data') is not None: + try: + # Use newline='' to prevent conversion of newline characters + # See https://github.com/ytdl-org/youtube-dl/issues/10268 + with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: + subfile.write(sub_info['data']) + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + continue + except OSError: + self.report_error(f'Cannot write video subtitles file {sub_filename}') + return None + + try: + sub_copy = sub_info.copy() + sub_copy.setdefault('http_headers', info_dict.get('http_headers')) + self.dl(sub_filename, sub_copy, subtitle=True) + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' + if self.params.get('ignoreerrors') is not True: # False or 'only_download' + if not self.params.get('ignoreerrors'): + self.report_error(msg) + raise DownloadError(msg) + self.report_warning(msg) + return ret + + def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): + ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error ''' + write_all = self.params.get('write_all_thumbnails', False) + thumbnails, ret = [], [] + if write_all or self.params.get('writethumbnail', False): + thumbnails = info_dict.get('thumbnails') or [] + if not thumbnails: + self.to_screen(f'[info] There are no {label} thumbnails to download') + return ret + multiple = write_all and len(thumbnails) > 1 + + if thumb_filename_base is None: + thumb_filename_base = filename + if thumbnails and not thumb_filename_base: + self.write_debug(f'Skipping writing {label} thumbnail') + return ret + + if thumbnails and not self._ensure_dir_exists(filename): + return None + + for idx, t in list(enumerate(thumbnails))[::-1]: + thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') + thumb_display_id = f'{label} thumbnail {t["id"]}' + thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) + thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) + + existing_thumb = self.existing_file((thumb_filename_final, thumb_filename)) + if existing_thumb: + self.to_screen('[info] %s is already present' % ( + thumb_display_id if multiple else f'{label} thumbnail').capitalize()) + t['filepath'] = existing_thumb + ret.append((existing_thumb, thumb_filename_final)) + else: + self.to_screen(f'[info] Downloading {thumb_display_id} ...') + try: + uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {}))) + self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') + with open(encodeFilename(thumb_filename), 'wb') as thumbf: + shutil.copyfileobj(uf, thumbf) + ret.append((thumb_filename, thumb_filename_final)) + t['filepath'] = thumb_filename + except network_exceptions as err: + if isinstance(err, HTTPError) and err.status == 404: + self.to_screen(f'[info] {thumb_display_id.title()} does not exist') + else: + self.report_warning(f'Unable to download {thumb_display_id}: {err}') + thumbnails.pop(idx) + if ret and not write_all: + break + return ret diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py new file mode 100644 index 0000000..aeea262 --- /dev/null +++ b/yt_dlp/__init__.py @@ -0,0 +1,1054 @@ +import sys + +if sys.version_info < (3, 8): + raise ImportError( + f'You are using an unsupported version of Python. Only Python versions 3.8 and above are supported by yt-dlp') # noqa: F541 + +__license__ = 'The Unlicense' + +import collections +import getpass +import itertools +import optparse +import os +import re +import traceback + +from .compat import compat_os_name, compat_shlex_quote +from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS +from .downloader.external import get_external_downloader +from .extractor import list_extractor_classes +from .extractor.adobepass import MSO_INFO +from .options import parseOpts +from .postprocessor import ( + FFmpegExtractAudioPP, + FFmpegMergerPP, + FFmpegPostProcessor, + FFmpegSubtitlesConvertorPP, + FFmpegThumbnailsConvertorPP, + FFmpegVideoConvertorPP, + FFmpegVideoRemuxerPP, + MetadataFromFieldPP, + MetadataParserPP, +) +from .update import Updater +from .utils import ( + NO_DEFAULT, + POSTPROCESS_WHEN, + DateRange, + DownloadCancelled, + DownloadError, + FormatSorter, + GeoUtils, + PlaylistEntries, + SameFileError, + decodeOption, + download_range_func, + expand_path, + float_or_none, + format_field, + int_or_none, + match_filter_func, + parse_bytes, + parse_duration, + preferredencoding, + read_batch_urls, + read_stdin, + render_table, + setproctitle, + traverse_obj, + variadic, + write_string, +) +from .utils.networking import std_headers +from .YoutubeDL import YoutubeDL + +_IN_CLI = False + + +def _exit(status=0, *args): + for msg in args: + sys.stderr.write(msg) + raise SystemExit(status) + + +def get_urls(urls, batchfile, verbose): + """ + @param verbose -1: quiet, 0: normal, 1: verbose + """ + batch_urls = [] + if batchfile is not None: + try: + batch_urls = read_batch_urls( + read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-' + else open(expand_path(batchfile), encoding='utf-8', errors='ignore')) + if verbose == 1: + write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') + except OSError: + _exit(f'ERROR: batch file {batchfile} could not be read') + _enc = preferredencoding() + return [ + url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip() + for url in batch_urls + urls] + + +def print_extractor_information(opts, urls): + out = '' + if opts.list_extractors: + # Importing GenericIE is currently slow since it imports YoutubeIE + from .extractor.generic import GenericIE + + urls = dict.fromkeys(urls, False) + for ie in list_extractor_classes(opts.age_limit): + out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' + if ie == GenericIE: + matched_urls = [url for url, matched in urls.items() if not matched] + else: + matched_urls = tuple(filter(ie.suitable, urls.keys())) + urls.update(dict.fromkeys(matched_urls, True)) + out += ''.join(f' {url}\n' for url in matched_urls) + elif opts.list_extractor_descriptions: + _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') + out = '\n'.join( + ie.description(markdown=False, search_examples=_SEARCHES) + for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) + elif opts.ap_list_mso: + out = 'Supported TV Providers:\n%s\n' % render_table( + ['mso', 'mso name'], + [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]) + else: + return False + write_string(out, out=sys.stdout) + return True + + +def set_compat_opts(opts): + def _unused_compat_opt(name): + if name not in opts.compat_opts: + return False + opts.compat_opts.discard(name) + opts.compat_opts.update(['*%s' % name]) + return True + + def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): + attr = getattr(opts, opt_name) + if compat_name in opts.compat_opts: + if attr is None: + setattr(opts, opt_name, not default) + return True + else: + if remove_compat: + _unused_compat_opt(compat_name) + return False + elif attr is None: + setattr(opts, opt_name, default) + return None + + set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') + set_default_compat('no-playlist-metafiles', 'allow_playlist_files') + set_default_compat('no-clean-infojson', 'clean_infojson') + if 'no-attach-info-json' in opts.compat_opts: + if opts.embed_infojson: + _unused_compat_opt('no-attach-info-json') + else: + opts.embed_infojson = False + if 'format-sort' in opts.compat_opts: + opts.format_sort.extend(FormatSorter.ytdl_default) + _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) + _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) + if _video_multistreams_set is False and _audio_multistreams_set is False: + _unused_compat_opt('multistreams') + if 'filename' in opts.compat_opts: + if opts.outtmpl.get('default') is None: + opts.outtmpl.update({'default': '%(title)s-%(id)s.%(ext)s'}) + else: + _unused_compat_opt('filename') + + +def validate_options(opts): + def validate(cndn, name, value=None, msg=None): + if cndn: + return True + raise ValueError((msg or 'invalid {name} "{value}" given').format(name=name, value=value)) + + def validate_in(name, value, items, msg=None): + return validate(value is None or value in items, name, value, msg) + + def validate_regex(name, value, regex): + return validate(value is None or re.match(regex, value), name, value) + + def validate_positive(name, value, strict=False): + return validate(value is None or value > 0 or (not strict and value == 0), + name, value, '{name} "{value}" must be positive' + ('' if strict else ' or 0')) + + def validate_minmax(min_val, max_val, min_name, max_name=None): + if max_val is None or min_val is None or max_val >= min_val: + return + if not max_name: + min_name, max_name = f'min {min_name}', f'max {min_name}' + raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"') + + # Usernames and passwords + validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc', + msg='{name}, netrc command and username/password are mutually exclusive options') + validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing') + validate(opts.ap_password is None or opts.ap_username is not None, + 'TV Provider account username', msg='{name} missing') + validate_in('TV Provider', opts.ap_mso, MSO_INFO, + 'Unsupported {name} "{value}", use --ap-list-mso to get a list of supported TV Providers') + + # Numbers + validate_positive('autonumber start', opts.autonumber_start) + validate_positive('autonumber size', opts.autonumber_size, True) + validate_positive('concurrent fragments', opts.concurrent_fragment_downloads, True) + validate_positive('playlist start', opts.playliststart, True) + if opts.playlistend != -1: + validate_minmax(opts.playliststart, opts.playlistend, 'playlist start', 'playlist end') + + # Time ranges + validate_positive('subtitles sleep interval', opts.sleep_interval_subtitles) + validate_positive('requests sleep interval', opts.sleep_interval_requests) + validate_positive('sleep interval', opts.sleep_interval) + validate_positive('max sleep interval', opts.max_sleep_interval) + if opts.sleep_interval is None: + validate( + opts.max_sleep_interval is None, 'min sleep interval', + msg='{name} must be specified; use --min-sleep-interval') + elif opts.max_sleep_interval is None: + opts.max_sleep_interval = opts.sleep_interval + else: + validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') + + if opts.wait_for_video is not None: + min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), + 'time range to wait for video', opts.wait_for_video) + validate_minmax(min_wait, max_wait, 'time range to wait for video') + opts.wait_for_video = (min_wait, max_wait) + + # Format sort + for f in opts.format_sort: + validate_regex('format sorting', f, FormatSorter.regex) + + # Postprocessor formats + validate_regex('merge output format', opts.merge_output_format, + r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) + validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) + validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS) + validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE) + validate_regex('recode video format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE) + validate_regex('remux video format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE) + if opts.audioquality: + opts.audioquality = opts.audioquality.strip('k').strip('K') + # int_or_none prevents inf, nan + validate_positive('audio quality', int_or_none(float_or_none(opts.audioquality), default=0)) + + # Retries + def parse_retries(name, value): + if value is None: + return None + elif value in ('inf', 'infinite'): + return float('inf') + try: + return int(value) + except (TypeError, ValueError): + validate(False, f'{name} retry count', value) + + opts.retries = parse_retries('download', opts.retries) + opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) + opts.extractor_retries = parse_retries('extractor', opts.extractor_retries) + opts.file_access_retries = parse_retries('file access', opts.file_access_retries) + + # Retry sleep function + def parse_sleep_func(expr): + NUMBER_RE = r'\d+(?:\.\d+)?' + op, start, limit, step, *_ = tuple(re.fullmatch( + rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', + expr.strip()).groups()) + (None, None) + + if op == 'exp': + return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) + else: + default_step = start if op or limit else 0 + return lambda n: min(float(start) + float(step or default_step) * n, float(limit or 'inf')) + + for key, expr in opts.retry_sleep.items(): + if not expr: + del opts.retry_sleep[key] + continue + try: + opts.retry_sleep[key] = parse_sleep_func(expr) + except AttributeError: + raise ValueError(f'invalid {key} retry sleep expression {expr!r}') + + # Bytes + def validate_bytes(name, value): + if value is None: + return None + numeric_limit = parse_bytes(value) + validate(numeric_limit is not None, 'rate limit', value) + return numeric_limit + + opts.ratelimit = validate_bytes('rate limit', opts.ratelimit) + opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) + opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) + opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) + opts.buffersize = validate_bytes('buffer size', opts.buffersize) + opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) + + # Output templates + def validate_outtmpl(tmpl, msg): + err = YoutubeDL.validate_outtmpl(tmpl) + if err: + raise ValueError(f'invalid {msg} "{tmpl}": {err}') + + for k, tmpl in opts.outtmpl.items(): + validate_outtmpl(tmpl, f'{k} output template') + for type_, tmpl_list in opts.forceprint.items(): + for tmpl in tmpl_list: + validate_outtmpl(tmpl, f'{type_} print template') + for type_, tmpl_list in opts.print_to_file.items(): + for tmpl, file in tmpl_list: + validate_outtmpl(tmpl, f'{type_} print to file template') + validate_outtmpl(file, f'{type_} print to file filename') + validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') + for k, tmpl in opts.progress_template.items(): + k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' + validate_outtmpl(tmpl, f'{k} template') + + outtmpl_default = opts.outtmpl.get('default') + if outtmpl_default == '': + opts.skip_download = None + del opts.outtmpl['default'] + + def parse_chapters(name, value, advanced=False): + parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x) + TIMESTAMP_RE = r'''(?x)(?: + (?P-?)(?P[^-]+) + )?\s*-\s*(?: + (?P-?)(?P[^-]+) + )?''' + + chapters, ranges, from_url = [], [], False + for regex in value or []: + if advanced and regex == '*from-url': + from_url = True + continue + elif not regex.startswith('*'): + try: + chapters.append(re.compile(regex)) + except re.error as err: + raise ValueError(f'invalid {name} regex "{regex}" - {err}') + continue + + for range_ in map(str.strip, regex[1:].split(',')): + mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_) + dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')] + signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign')) + + err = None + if None in (dur or [None]): + err = 'Must be of the form "*start-end"' + elif not advanced and any(signs): + err = 'Negative timestamps are not allowed' + else: + dur[0] *= -1 if signs[0] else 1 + dur[1] *= -1 if signs[1] else 1 + if dur[1] == float('-inf'): + err = '"-inf" is not a valid end' + if err: + raise ValueError(f'invalid {name} time range "{regex}". {err}') + ranges.append(dur) + + return chapters, ranges, from_url + + opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters) + opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True)) + + # Cookies from browser + if opts.cookiesfrombrowser: + container = None + mobj = re.fullmatch(r'''(?x) + (?P[^+:]+) + (?:\s*\+\s*(?P[^:]+))? + (?:\s*:\s*(?!:)(?P.+?))? + (?:\s*::\s*(?P.+))? + ''', opts.cookiesfrombrowser) + if mobj is None: + raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') + browser_name, keyring, profile, container = mobj.group('name', 'keyring', 'profile', 'container') + browser_name = browser_name.lower() + if browser_name not in SUPPORTED_BROWSERS: + raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". ' + f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') + if keyring is not None: + keyring = keyring.upper() + if keyring not in SUPPORTED_KEYRINGS: + raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". ' + f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') + opts.cookiesfrombrowser = (browser_name, profile, keyring, container) + + # MetadataParser + def metadataparser_actions(f): + if isinstance(f, str): + cmd = '--parse-metadata %s' % compat_shlex_quote(f) + try: + actions = [MetadataFromFieldPP.to_action(f)] + except Exception as err: + raise ValueError(f'{cmd} is invalid; {err}') + else: + cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) + actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) + + for action in actions: + try: + MetadataParserPP.validate_action(*action) + except Exception as err: + raise ValueError(f'{cmd} is invalid; {err}') + yield action + + if opts.metafromtitle is not None: + opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata = { + k: list(itertools.chain(*map(metadataparser_actions, v))) + for k, v in opts.parse_metadata.items() + } + + # Other options + if opts.playlist_items is not None: + try: + tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) + except Exception as err: + raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}') + + opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None + if opts.geo_bypass.lower() not in ('default', 'never'): + try: + GeoUtils.random_ipv4(opts.geo_bypass) + except Exception: + raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"') + if len(opts.geo_bypass) == 2: + opts.geo_bypass_country = opts.geo_bypass + else: + opts.geo_bypass_ip_block = opts.geo_bypass + opts.geo_bypass = opts.geo_bypass.lower() != 'never' + + opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter) + + if opts.download_archive is not None: + opts.download_archive = expand_path(opts.download_archive) + + if opts.ffmpeg_location is not None: + opts.ffmpeg_location = expand_path(opts.ffmpeg_location) + + if opts.user_agent is not None: + opts.headers.setdefault('User-Agent', opts.user_agent) + if opts.referer is not None: + opts.headers.setdefault('Referer', opts.referer) + + if opts.no_sponsorblock: + opts.sponsorblock_mark = opts.sponsorblock_remove = set() + + default_downloader = None + for proto, path in opts.external_downloader.items(): + if path == 'native': + continue + ed = get_external_downloader(path) + if ed is None: + raise ValueError( + f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"') + elif ed and proto == 'default': + default_downloader = ed.get_basename() + + for policy in opts.color.values(): + if policy not in ('always', 'auto', 'no_color', 'never'): + raise ValueError(f'"{policy}" is not a valid color policy') + + warnings, deprecation_warnings = [], [] + + # Common mistake: -f best + if opts.format == 'best': + warnings.append('.\n '.join(( + '"-f best" selects the best pre-merged format which is often not the best option', + 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', + 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) + + # --(postprocessor/downloader)-args without name + def report_args_compat(name, value, key1, key2=None, where=None): + if key1 in value and key2 not in value: + warnings.append(f'{name.title()} arguments given without specifying name. ' + f'The arguments will be given to {where or f"all {name}s"}') + return True + return False + + if report_args_compat('external downloader', opts.external_downloader_args, + 'default', where=default_downloader) and default_downloader: + # Compat with youtube-dl's behavior. See https://github.com/ytdl-org/youtube-dl/commit/49c5293014bc11ec8c009856cd63cffa6296c1e1 + opts.external_downloader_args.setdefault(default_downloader, opts.external_downloader_args.pop('default')) + + if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'): + opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat') + opts.postprocessor_args.setdefault('sponskrub', []) + + def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats', + val1=NO_DEFAULT, val2=NO_DEFAULT, default=False): + if val2 is NO_DEFAULT: + val2 = getattr(opts, opt2) + if not val2: + return + + if val1 is NO_DEFAULT: + val1 = getattr(opts, opt1) + if val1: + warnings.append(f'{arg1} is ignored since {arg2} was given') + setattr(opts, opt1, default) + + # Conflicting options + report_conflict('--playlist-reverse', 'playlist_reverse', '--playlist-random', 'playlist_random') + report_conflict('--playlist-reverse', 'playlist_reverse', '--lazy-playlist', 'lazy_playlist') + report_conflict('--playlist-random', 'playlist_random', '--lazy-playlist', 'lazy_playlist') + report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None) + report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None) + report_conflict('--exec-before-download', 'exec_before_dl_cmd', + '"--exec before_dl:"', 'exec_cmd', val2=opts.exec_cmd.get('before_dl')) + report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) + report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') + report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') + report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark') + report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove') + report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', + val1=opts.sponskrub and opts.sponskrub_cut) + + # Conflicts with --allow-unplayable-formats + report_conflict('--embed-metadata', 'addmetadata') + report_conflict('--embed-chapters', 'addchapters') + report_conflict('--embed-info-json', 'embed_infojson') + report_conflict('--embed-subs', 'embedsubtitles') + report_conflict('--embed-thumbnail', 'embedthumbnail') + report_conflict('--extract-audio', 'extractaudio') + report_conflict('--fixup', 'fixup', val1=opts.fixup not in (None, 'never', 'ignore'), default='never') + report_conflict('--recode-video', 'recodevideo') + report_conflict('--remove-chapters', 'remove_chapters', default=[]) + report_conflict('--remux-video', 'remuxvideo') + report_conflict('--sponskrub', 'sponskrub') + report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set()) + report_conflict('--xattrs', 'xattrs') + + # Fully deprecated options + def report_deprecation(val, old, new=None): + if not val: + return + deprecation_warnings.append( + f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new + else f'{old} is deprecated and may not work as expected') + + report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') + report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') + # report_deprecation(opts.include_ads, '--include-ads') # We may re-implement this in future + # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future + # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it + + # Dependent options + opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore) + + if opts.exec_before_dl_cmd: + opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd + + if opts.useid: # --id is not deprecated in youtube-dl + opts.outtmpl['default'] = '%(id)s.%(ext)s' + + if opts.overwrites: # --force-overwrites implies --no-continue + opts.continue_dl = False + + if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + # Add chapters when adding metadata or marking sponsors + opts.addchapters = True + + if opts.extractaudio and not opts.keepvideo and opts.format is None: + # Do not unnecessarily download audio + opts.format = 'bestaudio/best' + + if opts.getcomments and opts.writeinfojson is None and not opts.embed_infojson: + # If JSON is not printed anywhere, but comments are requested, save it to file + if not opts.dumpjson or opts.print_json or opts.dump_single_json: + opts.writeinfojson = True + + if opts.allsubtitles and not (opts.embedsubtitles or opts.writeautomaticsub): + # --all-sub automatically sets --write-sub if --write-auto-sub is not given + opts.writesubtitles = True + + if opts.addmetadata and opts.embed_infojson is None: + # If embedding metadata and infojson is present, embed it + opts.embed_infojson = 'if_exists' + + # Ask for passwords + if opts.username is not None and opts.password is None: + opts.password = getpass.getpass('Type account password and press [Return]: ') + if opts.ap_username is not None and opts.ap_password is None: + opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') + + return warnings, deprecation_warnings + + +def get_postprocessors(opts): + yield from opts.add_postprocessors + + for when, actions in opts.parse_metadata.items(): + yield { + 'key': 'MetadataParser', + 'actions': actions, + 'when': when + } + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + if sponsorblock_query: + yield { + 'key': 'SponsorBlock', + 'categories': sponsorblock_query, + 'api': opts.sponsorblock_api, + 'when': 'after_filter' + } + if opts.convertsubtitles: + yield { + 'key': 'FFmpegSubtitlesConvertor', + 'format': opts.convertsubtitles, + 'when': 'before_dl' + } + if opts.convertthumbnails: + yield { + 'key': 'FFmpegThumbnailsConvertor', + 'format': opts.convertthumbnails, + 'when': 'before_dl' + } + if opts.extractaudio: + yield { + 'key': 'FFmpegExtractAudio', + 'preferredcodec': opts.audioformat, + 'preferredquality': opts.audioquality, + 'nopostoverwrites': opts.nopostoverwrites, + } + if opts.remuxvideo: + yield { + 'key': 'FFmpegVideoRemuxer', + 'preferedformat': opts.remuxvideo, + } + if opts.recodevideo: + yield { + 'key': 'FFmpegVideoConvertor', + 'preferedformat': opts.recodevideo, + } + # If ModifyChapters is going to remove chapters, subtitles must already be in the container. + if opts.embedsubtitles: + keep_subs = 'no-keep-subs' not in opts.compat_opts + yield { + 'key': 'FFmpegEmbedSubtitle', + # already_have_subtitle = True prevents the file from being deleted after embedding + 'already_have_subtitle': opts.writesubtitles and keep_subs + } + if not opts.writeautomaticsub and keep_subs: + opts.writesubtitles = True + + # ModifyChapters must run before FFmpegMetadataPP + if opts.remove_chapters or sponsorblock_query: + yield { + 'key': 'ModifyChapters', + 'remove_chapters_patterns': opts.remove_chapters, + 'remove_sponsor_segments': opts.sponsorblock_remove, + 'remove_ranges': opts.remove_ranges, + 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, + 'force_keyframes': opts.force_keyframes_at_cuts + } + # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and + # FFmpegExtractAudioPP as containers before conversion may not support + # metadata (3gp, webm, etc.) + # By default ffmpeg preserves metadata applicable for both + # source and target containers. From this point the container won't change, + # so metadata can be added here. + if opts.addmetadata or opts.addchapters or opts.embed_infojson: + yield { + 'key': 'FFmpegMetadata', + 'add_chapters': opts.addchapters, + 'add_metadata': opts.addmetadata, + 'add_infojson': opts.embed_infojson, + } + # Deprecated + # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment + # but must be below EmbedSubtitle and FFmpegMetadata + # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 + # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found + if opts.sponskrub is not False: + yield { + 'key': 'SponSkrub', + 'path': opts.sponskrub_path, + 'args': opts.sponskrub_args, + 'cut': opts.sponskrub_cut, + 'force': opts.sponskrub_force, + 'ignoreerror': opts.sponskrub is None, + '_from_cli': True, + } + if opts.embedthumbnail: + yield { + 'key': 'EmbedThumbnail', + # already_have_thumbnail = True prevents the file from being deleted after embedding + 'already_have_thumbnail': opts.writethumbnail + } + if not opts.writethumbnail: + opts.writethumbnail = True + opts.outtmpl['pl_thumbnail'] = '' + if opts.split_chapters: + yield { + 'key': 'FFmpegSplitChapters', + 'force_keyframes': opts.force_keyframes_at_cuts, + } + # XAttrMetadataPP should be run after post-processors that may change file contents + if opts.xattrs: + yield {'key': 'XAttrMetadata'} + if opts.concat_playlist != 'never': + yield { + 'key': 'FFmpegConcat', + 'only_multi_video': opts.concat_playlist != 'always', + 'when': 'playlist', + } + # Exec must be the last PP of each category + for when, exec_cmd in opts.exec_cmd.items(): + yield { + 'key': 'Exec', + 'exec_cmd': exec_cmd, + 'when': when, + } + + +ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'urls', 'ydl_opts')) + + +def parse_options(argv=None): + """@returns ParsedOptions(parser, opts, urls, ydl_opts)""" + parser, opts, urls = parseOpts(argv) + urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose) + + set_compat_opts(opts) + try: + warnings, deprecation_warnings = validate_options(opts) + except ValueError as err: + parser.error(f'{err}\n') + + postprocessors = list(get_postprocessors(opts)) + + print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) + any_getting = any(getattr(opts, k) for k in ( + 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', + 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' + )) + if opts.quiet is None: + opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) + + playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist'] + write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson + and opts.allow_playlist_files and opts.outtmpl.get('pl_infojson') != '') + if not any(( + opts.extract_flat, + opts.dump_single_json, + opts.forceprint.get('playlist'), + opts.print_to_file.get('playlist'), + write_playlist_infojson, + )): + if not playlist_pps: + opts.extract_flat = 'discard' + elif playlist_pps == [{'key': 'FFmpegConcat', 'only_multi_video': True, 'when': 'playlist'}]: + opts.extract_flat = 'discard_in_playlist' + + final_ext = ( + opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS + else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS + else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS) + else None) + + return ParsedOptions(parser, opts, urls, { + 'usenetrc': opts.usenetrc, + 'netrc_location': opts.netrc_location, + 'netrc_cmd': opts.netrc_cmd, + 'username': opts.username, + 'password': opts.password, + 'twofactor': opts.twofactor, + 'videopassword': opts.videopassword, + 'ap_mso': opts.ap_mso, + 'ap_username': opts.ap_username, + 'ap_password': opts.ap_password, + 'client_certificate': opts.client_certificate, + 'client_certificate_key': opts.client_certificate_key, + 'client_certificate_password': opts.client_certificate_password, + 'quiet': opts.quiet, + 'no_warnings': opts.no_warnings, + 'forceurl': opts.geturl, + 'forcetitle': opts.gettitle, + 'forceid': opts.getid, + 'forcethumbnail': opts.getthumbnail, + 'forcedescription': opts.getdescription, + 'forceduration': opts.getduration, + 'forcefilename': opts.getfilename, + 'forceformat': opts.getformat, + 'forceprint': opts.forceprint, + 'print_to_file': opts.print_to_file, + 'forcejson': opts.dumpjson or opts.print_json, + 'dump_single_json': opts.dump_single_json, + 'force_write_download_archive': opts.force_write_download_archive, + 'simulate': (print_only or any_getting or None) if opts.simulate is None else opts.simulate, + 'skip_download': opts.skip_download, + 'format': opts.format, + 'allow_unplayable_formats': opts.allow_unplayable_formats, + 'ignore_no_formats_error': opts.ignore_no_formats_error, + 'format_sort': opts.format_sort, + 'format_sort_force': opts.format_sort_force, + 'allow_multiple_video_streams': opts.allow_multiple_video_streams, + 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams, + 'check_formats': opts.check_formats, + 'listformats': opts.listformats, + 'listformats_table': opts.listformats_table, + 'outtmpl': opts.outtmpl, + 'outtmpl_na_placeholder': opts.outtmpl_na_placeholder, + 'paths': opts.paths, + 'autonumber_size': opts.autonumber_size, + 'autonumber_start': opts.autonumber_start, + 'restrictfilenames': opts.restrictfilenames, + 'windowsfilenames': opts.windowsfilenames, + 'ignoreerrors': opts.ignoreerrors, + 'force_generic_extractor': opts.force_generic_extractor, + 'allowed_extractors': opts.allowed_extractors or ['default'], + 'ratelimit': opts.ratelimit, + 'throttledratelimit': opts.throttledratelimit, + 'overwrites': opts.overwrites, + 'retries': opts.retries, + 'file_access_retries': opts.file_access_retries, + 'fragment_retries': opts.fragment_retries, + 'extractor_retries': opts.extractor_retries, + 'retry_sleep_functions': opts.retry_sleep, + 'skip_unavailable_fragments': opts.skip_unavailable_fragments, + 'keep_fragments': opts.keep_fragments, + 'concurrent_fragment_downloads': opts.concurrent_fragment_downloads, + 'buffersize': opts.buffersize, + 'noresizebuffer': opts.noresizebuffer, + 'http_chunk_size': opts.http_chunk_size, + 'continuedl': opts.continue_dl, + 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, + 'progress_with_newline': opts.progress_with_newline, + 'progress_template': opts.progress_template, + 'playliststart': opts.playliststart, + 'playlistend': opts.playlistend, + 'playlistreverse': opts.playlist_reverse, + 'playlistrandom': opts.playlist_random, + 'lazy_playlist': opts.lazy_playlist, + 'noplaylist': opts.noplaylist, + 'logtostderr': opts.outtmpl.get('default') == '-', + 'consoletitle': opts.consoletitle, + 'nopart': opts.nopart, + 'updatetime': opts.updatetime, + 'writedescription': opts.writedescription, + 'writeannotations': opts.writeannotations, + 'writeinfojson': opts.writeinfojson, + 'allow_playlist_files': opts.allow_playlist_files, + 'clean_infojson': opts.clean_infojson, + 'getcomments': opts.getcomments, + 'writethumbnail': opts.writethumbnail is True, + 'write_all_thumbnails': opts.writethumbnail == 'all', + 'writelink': opts.writelink, + 'writeurllink': opts.writeurllink, + 'writewebloclink': opts.writewebloclink, + 'writedesktoplink': opts.writedesktoplink, + 'writesubtitles': opts.writesubtitles, + 'writeautomaticsub': opts.writeautomaticsub, + 'allsubtitles': opts.allsubtitles, + 'listsubtitles': opts.listsubtitles, + 'subtitlesformat': opts.subtitlesformat, + 'subtitleslangs': opts.subtitleslangs, + 'matchtitle': decodeOption(opts.matchtitle), + 'rejecttitle': decodeOption(opts.rejecttitle), + 'max_downloads': opts.max_downloads, + 'prefer_free_formats': opts.prefer_free_formats, + 'trim_file_name': opts.trim_file_name, + 'verbose': opts.verbose, + 'dump_intermediate_pages': opts.dump_intermediate_pages, + 'write_pages': opts.write_pages, + 'load_pages': opts.load_pages, + 'test': opts.test, + 'keepvideo': opts.keepvideo, + 'min_filesize': opts.min_filesize, + 'max_filesize': opts.max_filesize, + 'min_views': opts.min_views, + 'max_views': opts.max_views, + 'daterange': opts.date, + 'cachedir': opts.cachedir, + 'youtube_print_sig_code': opts.youtube_print_sig_code, + 'age_limit': opts.age_limit, + 'download_archive': opts.download_archive, + 'break_on_existing': opts.break_on_existing, + 'break_on_reject': opts.break_on_reject, + 'break_per_url': opts.break_per_url, + 'skip_playlist_after_errors': opts.skip_playlist_after_errors, + 'cookiefile': opts.cookiefile, + 'cookiesfrombrowser': opts.cookiesfrombrowser, + 'legacyserverconnect': opts.legacy_server_connect, + 'nocheckcertificate': opts.no_check_certificate, + 'prefer_insecure': opts.prefer_insecure, + 'enable_file_urls': opts.enable_file_urls, + 'http_headers': opts.headers, + 'proxy': opts.proxy, + 'socket_timeout': opts.socket_timeout, + 'bidi_workaround': opts.bidi_workaround, + 'debug_printtraffic': opts.debug_printtraffic, + 'prefer_ffmpeg': opts.prefer_ffmpeg, + 'include_ads': opts.include_ads, + 'default_search': opts.default_search, + 'dynamic_mpd': opts.dynamic_mpd, + 'extractor_args': opts.extractor_args, + 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, + 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest, + 'encoding': opts.encoding, + 'extract_flat': opts.extract_flat, + 'live_from_start': opts.live_from_start, + 'wait_for_video': opts.wait_for_video, + 'mark_watched': opts.mark_watched, + 'merge_output_format': opts.merge_output_format, + 'final_ext': final_ext, + 'postprocessors': postprocessors, + 'fixup': opts.fixup, + 'source_address': opts.source_address, + 'call_home': opts.call_home, + 'sleep_interval_requests': opts.sleep_interval_requests, + 'sleep_interval': opts.sleep_interval, + 'max_sleep_interval': opts.max_sleep_interval, + 'sleep_interval_subtitles': opts.sleep_interval_subtitles, + 'external_downloader': opts.external_downloader, + 'download_ranges': opts.download_ranges, + 'force_keyframes_at_cuts': opts.force_keyframes_at_cuts, + 'list_thumbnails': opts.list_thumbnails, + 'playlist_items': opts.playlist_items, + 'xattr_set_filesize': opts.xattr_set_filesize, + 'match_filter': opts.match_filter, + 'color': opts.color, + 'ffmpeg_location': opts.ffmpeg_location, + 'hls_prefer_native': opts.hls_prefer_native, + 'hls_use_mpegts': opts.hls_use_mpegts, + 'hls_split_discontinuity': opts.hls_split_discontinuity, + 'external_downloader_args': opts.external_downloader_args, + 'postprocessor_args': opts.postprocessor_args, + 'cn_verification_proxy': opts.cn_verification_proxy, + 'geo_verification_proxy': opts.geo_verification_proxy, + 'geo_bypass': opts.geo_bypass, + 'geo_bypass_country': opts.geo_bypass_country, + 'geo_bypass_ip_block': opts.geo_bypass_ip_block, + '_warnings': warnings, + '_deprecation_warnings': deprecation_warnings, + 'compat_opts': opts.compat_opts, + }) + + +def _real_main(argv=None): + setproctitle('yt-dlp') + + parser, opts, all_urls, ydl_opts = parse_options(argv) + + # Dump user agent + if opts.dump_user_agent: + ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) + write_string(f'{ua}\n', out=sys.stdout) + return + + if print_extractor_information(opts, all_urls): + return + + # We may need ffmpeg_location without having access to the YoutubeDL instance + # See https://github.com/yt-dlp/yt-dlp/issues/2191 + if opts.ffmpeg_location: + FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location) + + with YoutubeDL(ydl_opts) as ydl: + pre_process = opts.update_self or opts.rm_cachedir + actual_use = all_urls or opts.load_info_filename + + if opts.rm_cachedir: + ydl.cache.remove() + + try: + updater = Updater(ydl, opts.update_self) + if opts.update_self and updater.update() and actual_use: + if updater.cmd: + return updater.restart() + # This code is reachable only for zip variant in py < 3.10 + # It makes sense to exit here, but the old behavior is to continue + ydl.report_warning('Restart yt-dlp to use the updated version') + # return 100, 'ERROR: The program must exit for the update to complete' + except Exception: + traceback.print_exc() + ydl._download_retcode = 100 + + if not actual_use: + if pre_process: + return ydl._download_retcode + + args = sys.argv[1:] if argv is None else argv + ydl.warn_if_short_id(args) + + # Show a useful error message and wait for keypress if not launched from shell on Windows + if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False): + import ctypes.wintypes + import msvcrt + + kernel32 = ctypes.WinDLL('Kernel32') + + buffer = (1 * ctypes.wintypes.DWORD)() + attached_processes = kernel32.GetConsoleProcessList(buffer, 1) + # If we only have a single process attached, then the executable was double clicked + # When using `pyinstaller` with `--onefile`, two processes get attached + is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI') + if attached_processes == 1 or is_onefile and attached_processes == 2: + print(parser._generate_error_message( + 'Do not double-click the executable, instead call it from a command line.\n' + 'Please read the README for further information on how to use yt-dlp: ' + 'https://github.com/yt-dlp/yt-dlp#readme')) + msvcrt.getch() + _exit(2) + parser.error( + 'You must provide at least one URL.\n' + 'Type yt-dlp --help to see a list of all options.') + + parser.destroy() + try: + if opts.load_info_filename is not None: + if all_urls: + ydl.report_warning('URLs are ignored due to --load-info-json') + return ydl.download_with_info_file(expand_path(opts.load_info_filename)) + else: + return ydl.download(all_urls) + except DownloadCancelled: + ydl.to_screen('Aborting remaining downloads') + return 101 + + +def main(argv=None): + global _IN_CLI + _IN_CLI = True + try: + _exit(*variadic(_real_main(argv))) + except DownloadError: + _exit(1) + except SameFileError as e: + _exit(f'ERROR: {e}') + except KeyboardInterrupt: + _exit('\nERROR: Interrupted by user') + except BrokenPipeError as e: + # https://docs.python.org/3/library/signal.html#note-on-sigpipe + devnull = os.open(os.devnull, os.O_WRONLY) + os.dup2(devnull, sys.stdout.fileno()) + _exit(f'\nERROR: {e}') + except optparse.OptParseError as e: + _exit(2, f'\n{e}') + + +from .extractor import gen_extractors, list_extractors + +__all__ = [ + 'main', + 'YoutubeDL', + 'parse_options', + 'gen_extractors', + 'list_extractors', +] diff --git a/yt_dlp/__main__.py b/yt_dlp/__main__.py new file mode 100644 index 0000000..06c3920 --- /dev/null +++ b/yt_dlp/__main__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 + +# Execute with +# $ python3 -m yt_dlp + +import sys + +if __package__ is None and not getattr(sys, 'frozen', False): + # direct call of __main__.py + import os.path + path = os.path.realpath(os.path.abspath(__file__)) + sys.path.insert(0, os.path.dirname(os.path.dirname(path))) + +import yt_dlp + +if __name__ == '__main__': + yt_dlp.main() diff --git a/yt_dlp/__pyinstaller/__init__.py b/yt_dlp/__pyinstaller/__init__.py new file mode 100644 index 0000000..1c52aad --- /dev/null +++ b/yt_dlp/__pyinstaller/__init__.py @@ -0,0 +1,5 @@ +import os + + +def get_hook_dirs(): + return [os.path.dirname(__file__)] diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py new file mode 100644 index 0000000..7c3dbfb --- /dev/null +++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py @@ -0,0 +1,34 @@ +import sys + +from PyInstaller.utils.hooks import collect_submodules + + +def pycryptodome_module(): + try: + import Cryptodome # noqa: F401 + except ImportError: + try: + import Crypto # noqa: F401 + print('WARNING: Using Crypto since Cryptodome is not available. ' + 'Install with: python3 -m pip install pycryptodomex', file=sys.stderr) + return 'Crypto' + except ImportError: + pass + return 'Cryptodome' + + +def get_hidden_imports(): + yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated') + yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated') + yield pycryptodome_module() + # Only `websockets` is required, others are collected just in case + for module in ('websockets', 'requests', 'urllib3'): + yield from collect_submodules(module) + # These are auto-detected, but explicitly add them just in case + yield from ('mutagen', 'brotli', 'certifi', 'secretstorage') + + +hiddenimports = list(get_hidden_imports()) +print(f'Adding imports: {hiddenimports}') + +excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle'] diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py new file mode 100644 index 0000000..b3a383c --- /dev/null +++ b/yt_dlp/aes.py @@ -0,0 +1,567 @@ +import base64 +from math import ceil + +from .compat import compat_ord +from .dependencies import Cryptodome +from .utils import bytes_to_intlist, intlist_to_bytes + +if Cryptodome.AES: + def aes_cbc_decrypt_bytes(data, key, iv): + """ Decrypt bytes with AES-CBC using pycryptodome """ + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data) + + def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): + """ Decrypt bytes with AES-GCM using pycryptodome """ + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) + +else: + def aes_cbc_decrypt_bytes(data, key, iv): + """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ + return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) + + def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): + """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ + return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) + + +def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): + return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) + + +BLOCK_SIZE_BYTES = 16 + + +def unpad_pkcs7(data): + return data[:-compat_ord(data[-1])] + + +def pkcs7_padding(data): + """ + PKCS#7 padding + + @param {int[]} data cleartext + @returns {int[]} padding data + """ + + remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES + return data + [remaining_length] * remaining_length + + +def pad_block(block, padding_mode): + """ + Pad a block with the given padding mode + @param {int[]} block block to pad + @param padding_mode padding mode + """ + padding_size = BLOCK_SIZE_BYTES - len(block) + + PADDING_BYTE = { + 'pkcs7': padding_size, + 'iso7816': 0x0, + 'whitespace': 0x20, + 'zero': 0x0, + } + + if padding_size < 0: + raise ValueError('Block size exceeded') + elif padding_mode not in PADDING_BYTE: + raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') + + if padding_mode == 'iso7816' and padding_size: + block = block + [0x80] # NB: += mutates list + padding_size -= 1 + + return block + [PADDING_BYTE[padding_mode]] * padding_size + + +def aes_ecb_encrypt(data, key, iv=None): + """ + Encrypt with aes in ECB mode. Using PKCS#7 padding + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key) + + return encrypted_data + + +def aes_ecb_decrypt(data, key, iv=None): + """ + Decrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_decrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_ctr_decrypt(data, key, iv): + """ + Decrypt with aes in counter mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte initialization vector + @returns {int[]} decrypted data + """ + return aes_ctr_encrypt(data, key, iv) + + +def aes_ctr_encrypt(data, key, iv): + """ + Encrypt with aes in counter mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte initialization vector + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + counter = iter_vector(iv) + + encrypted_data = [] + for i in range(block_count): + counter_block = next(counter) + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + block += [0] * (BLOCK_SIZE_BYTES - len(block)) + + cipher_counter_block = aes_encrypt(counter_block, expanded_key) + encrypted_data += xor(block, cipher_counter_block) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_cbc_decrypt(data, key, iv): + """ + Decrypt with aes in CBC mode + + @param {int[]} data cipher + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte IV + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + decrypted_data = [] + previous_cipher_block = iv + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + block += [0] * (BLOCK_SIZE_BYTES - len(block)) + + decrypted_block = aes_decrypt(block, expanded_key) + decrypted_data += xor(decrypted_block, previous_cipher_block) + previous_cipher_block = block + decrypted_data = decrypted_data[:len(data)] + + return decrypted_data + + +def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): + """ + Encrypt with aes in CBC mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv 16-Byte IV + @param padding_mode Padding mode to use + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + previous_cipher_block = iv + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + block = pad_block(block, padding_mode) + + mixed_block = xor(block, previous_cipher_block) + + encrypted_block = aes_encrypt(mixed_block, expanded_key) + encrypted_data += encrypted_block + + previous_cipher_block = encrypted_block + + return encrypted_data + + +def aes_gcm_decrypt_and_verify(data, key, tag, nonce): + """ + Decrypt with aes in GBM mode and checks authenticity using tag + + @param {int[]} data cipher + @param {int[]} key 16-Byte cipher key + @param {int[]} tag authentication tag + @param {int[]} nonce IV (recommended 12-Byte) + @returns {int[]} decrypted data + """ + + # XXX: check aes, gcm param + + hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) + + if len(nonce) == 12: + j0 = nonce + [0, 0, 0, 1] + else: + fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 + ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) + j0 = ghash(hash_subkey, ghash_in) + + # TODO: add nonce support to aes_ctr_decrypt + + # nonce_ctr = j0[:12] + iv_ctr = inc(j0) + + decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) + pad_len = len(data) // 16 * 16 + s_tag = ghash( + hash_subkey, + data + + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data + + ((len(data) * 8).to_bytes(8, 'big'))) # length of data + ) + + if tag != aes_ctr_encrypt(s_tag, key, j0): + raise ValueError("Mismatching authentication tag") + + return decrypted_data + + +def aes_encrypt(data, expanded_key): + """ + Encrypt one block with aes + + @param {int[]} data 16-Byte state + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte cipher + """ + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 + + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + for i in range(1, rounds + 1): + data = sub_bytes(data) + data = shift_rows(data) + if i != rounds: + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX)) + data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) + + return data + + +def aes_decrypt(data, expanded_key): + """ + Decrypt one block with aes + + @param {int[]} data 16-Byte cipher + @param {int[]} expanded_key 176/208/240-Byte expanded key + @returns {int[]} 16-Byte state + """ + rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 + + for i in range(rounds, 0, -1): + data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) + if i != rounds: + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) + data = shift_rows_inv(data) + data = sub_bytes_inv(data) + data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) + + return data + + +def aes_decrypt_text(data, password, key_size_bytes): + """ + Decrypt text + - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter + - The cipher key is retrieved by encrypting the first 16 Byte of 'password' + with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) + - Mode of operation is 'counter' + + @param {str} data Base64 encoded string + @param {str,unicode} password Password (will be encoded with utf-8) + @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit + @returns {str} Decrypted data + """ + NONCE_LENGTH_BYTES = 8 + + data = bytes_to_intlist(base64.b64decode(data)) + password = bytes_to_intlist(password.encode()) + + key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) + key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) + + nonce = data[:NONCE_LENGTH_BYTES] + cipher = data[NONCE_LENGTH_BYTES:] + + decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) + plaintext = intlist_to_bytes(decrypted_data) + + return plaintext + + +RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) +SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, + 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, + 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, + 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, + 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, + 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, + 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, + 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, + 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, + 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, + 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, + 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, + 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, + 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, + 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, + 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) +SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d) +MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1), + (0x1, 0x2, 0x3, 0x1), + (0x1, 0x1, 0x2, 0x3), + (0x3, 0x1, 0x1, 0x2)) +MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9), + (0x9, 0xE, 0xB, 0xD), + (0xD, 0x9, 0xE, 0xB), + (0xB, 0xD, 0x9, 0xE)) +RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, + 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, + 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, + 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, + 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, + 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, + 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, + 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, + 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, + 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, + 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, + 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, + 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, + 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, + 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, + 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01) +RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, + 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, + 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, + 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, + 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, + 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, + 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, + 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, + 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, + 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, + 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, + 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, + 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, + 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, + 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, + 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07) + + +def key_expansion(data): + """ + Generate key schedule + + @param {int[]} data 16/24/32-Byte cipher key + @returns {int[]} 176/208/240-Byte expanded key + """ + data = data[:] # copy + rcon_iteration = 1 + key_size_bytes = len(data) + expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES + + while len(data) < expanded_key_size_bytes: + temp = data[-4:] + temp = key_schedule_core(temp, rcon_iteration) + rcon_iteration += 1 + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + for _ in range(3): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + if key_size_bytes == 32: + temp = data[-4:] + temp = sub_bytes(temp) + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + + for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): + temp = data[-4:] + data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) + data = data[:expanded_key_size_bytes] + + return data + + +def iter_vector(iv): + while True: + yield iv + iv = inc(iv) + + +def sub_bytes(data): + return [SBOX[x] for x in data] + + +def sub_bytes_inv(data): + return [SBOX_INV[x] for x in data] + + +def rotate(data): + return data[1:] + [data[0]] + + +def key_schedule_core(data, rcon_iteration): + data = rotate(data) + data = sub_bytes(data) + data[0] = data[0] ^ RCON[rcon_iteration] + + return data + + +def xor(data1, data2): + return [x ^ y for x, y in zip(data1, data2)] + + +def iter_mix_columns(data, matrix): + for i in (0, 4, 8, 12): + for row in matrix: + mixed = 0 + for j in range(4): + # xor is (+) and (-) + mixed ^= (0 if data[i:i + 4][j] == 0 or row[j] == 0 else + RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[data[i + j]] + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF]) + yield mixed + + +def shift_rows(data): + return [data[((column + row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] + + +def shift_rows_inv(data): + return [data[((column - row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] + + +def shift_block(data): + data_shifted = [] + + bit = 0 + for n in data: + if bit: + n |= 0x100 + bit = n & 1 + n >>= 1 + data_shifted.append(n) + + return data_shifted + + +def inc(data): + data = data[:] # copy + for i in range(len(data) - 1, -1, -1): + if data[i] == 255: + data[i] = 0 + else: + data[i] = data[i] + 1 + break + return data + + +def block_product(block_x, block_y): + # NIST SP 800-38D, Algorithm 1 + + if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: + raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) + + block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) + block_v = block_y[:] + block_z = [0] * BLOCK_SIZE_BYTES + + for i in block_x: + for bit in range(7, -1, -1): + if i & (1 << bit): + block_z = xor(block_z, block_v) + + do_xor = block_v[-1] & 1 + block_v = shift_block(block_v) + if do_xor: + block_v = xor(block_v, block_r) + + return block_z + + +def ghash(subkey, data): + # NIST SP 800-38D, Algorithm 2 + + if len(data) % BLOCK_SIZE_BYTES: + raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) + + last_y = [0] * BLOCK_SIZE_BYTES + for i in range(0, len(data), BLOCK_SIZE_BYTES): + block = data[i: i + BLOCK_SIZE_BYTES] + last_y = block_product(xor(last_y, block), subkey) + + return last_y + + +__all__ = [ + 'aes_cbc_decrypt', + 'aes_cbc_decrypt_bytes', + 'aes_ctr_decrypt', + 'aes_decrypt_text', + 'aes_decrypt', + 'aes_ecb_decrypt', + 'aes_gcm_decrypt_and_verify', + 'aes_gcm_decrypt_and_verify_bytes', + + 'aes_cbc_encrypt', + 'aes_cbc_encrypt_bytes', + 'aes_ctr_encrypt', + 'aes_ecb_encrypt', + 'aes_encrypt', + + 'key_expansion', + 'pad_block', + 'pkcs7_padding', + 'unpad_pkcs7', +] diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py new file mode 100644 index 0000000..9dd4f2f --- /dev/null +++ b/yt_dlp/cache.py @@ -0,0 +1,91 @@ +import contextlib +import json +import os +import re +import shutil +import traceback +import urllib.parse + +from .utils import expand_path, traverse_obj, version_tuple, write_json_file +from .version import __version__ + + +class Cache: + def __init__(self, ydl): + self._ydl = ydl + + def _get_root_dir(self): + res = self._ydl.params.get('cachedir') + if res is None: + cache_root = os.getenv('XDG_CACHE_HOME', '~/.cache') + res = os.path.join(cache_root, 'yt-dlp') + return expand_path(res) + + def _get_cache_fn(self, section, key, dtype): + assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}' + key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters + return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}') + + @property + def enabled(self): + return self._ydl.params.get('cachedir') is not False + + def store(self, section, key, data, dtype='json'): + assert dtype in ('json',) + + if not self.enabled: + return + + fn = self._get_cache_fn(section, key, dtype) + try: + os.makedirs(os.path.dirname(fn), exist_ok=True) + self._ydl.write_debug(f'Saving {section}.{key} to cache') + write_json_file({'yt-dlp_version': __version__, 'data': data}, fn) + except Exception: + tb = traceback.format_exc() + self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}') + + def _validate(self, data, min_ver): + version = traverse_obj(data, 'yt-dlp_version') + if not version: # Backward compatibility + data, version = {'data': data}, '2022.08.19' + if not min_ver or version_tuple(version) >= version_tuple(min_ver): + return data['data'] + self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})') + + def load(self, section, key, dtype='json', default=None, *, min_ver=None): + assert dtype in ('json',) + + if not self.enabled: + return default + + cache_fn = self._get_cache_fn(section, key, dtype) + with contextlib.suppress(OSError): + try: + with open(cache_fn, encoding='utf-8') as cachef: + self._ydl.write_debug(f'Loading {section}.{key} from cache') + return self._validate(json.load(cachef), min_ver) + except (ValueError, KeyError): + try: + file_size = os.path.getsize(cache_fn) + except OSError as oe: + file_size = str(oe) + self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})') + + return default + + def remove(self): + if not self.enabled: + self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') + return + + cachedir = self._get_root_dir() + if not any((term in cachedir) for term in ('cache', 'tmp')): + raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) + + self._ydl.to_screen( + 'Removing cache dir %s .' % cachedir, skip_eol=True) + if os.path.exists(cachedir): + self._ydl.to_screen('.', skip_eol=True) + shutil.rmtree(cachedir) + self._ydl.to_screen('.') diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py new file mode 100644 index 0000000..5ad5c70 --- /dev/null +++ b/yt_dlp/compat/__init__.py @@ -0,0 +1,79 @@ +import os +import sys +import xml.etree.ElementTree as etree + +from .compat_utils import passthrough_module + +passthrough_module(__name__, '._deprecated') +del passthrough_module + + +# HTMLParseError has been deprecated in Python 3.3 and removed in +# Python 3.5. Introducing dummy exception for Python >3.5 for compatible +# and uniform cross-version exception handling +class compat_HTMLParseError(ValueError): + pass + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + + +def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) + + +compat_os_name = os._name if os.name == 'java' else os.name + + +if compat_os_name == 'nt': + def compat_shlex_quote(s): + import re + return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""') +else: + from shlex import quote as compat_shlex_quote # noqa: F401 + + +def compat_ord(c): + return c if isinstance(c, int) else ord(c) + + +if compat_os_name == 'nt' and sys.version_info < (3, 8): + # os.path.realpath on Windows does not follow symbolic links + # prior to Python 3.8 (see https://bugs.python.org/issue9949) + def compat_realpath(path): + while os.path.islink(path): + path = os.path.abspath(os.readlink(path)) + return os.path.realpath(path) +else: + compat_realpath = os.path.realpath + + +# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl +# See https://github.com/yt-dlp/yt-dlp/issues/792 +# https://docs.python.org/3/library/os.path.html#os.path.expanduser +if compat_os_name in ('nt', 'ce'): + def compat_expanduser(path): + HOME = os.environ.get('HOME') + if not HOME: + return os.path.expanduser(path) + elif not path.startswith('~'): + return path + i = path.replace('\\', '/', 1).find('/') # ~user + if i < 0: + i = len(path) + userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME + return userhome + path[i:] +else: + compat_expanduser = os.path.expanduser + + +def urllib_req_to_req(urllib_request): + """Convert urllib Request to a networking Request""" + from ..networking import Request + from ..utils.networking import HTTPHeaderDict + return Request( + urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(), + headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs), + extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None) diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py new file mode 100644 index 0000000..607bae9 --- /dev/null +++ b/yt_dlp/compat/_deprecated.py @@ -0,0 +1,23 @@ +"""Deprecated - New code should avoid these""" +import warnings + +from .compat_utils import passthrough_module + +# XXX: Implement this the same way as other DeprecationWarnings without circular import +passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn( + DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) +del passthrough_module + +import base64 +import urllib.error +import urllib.parse + +compat_str = str + +compat_b64decode = base64.b64decode + +compat_urlparse = urllib.parse +compat_parse_qs = urllib.parse.parse_qs +compat_urllib_parse_unquote = urllib.parse.unquote +compat_urllib_parse_urlencode = urllib.parse.urlencode +compat_urllib_parse_urlparse = urllib.parse.urlparse diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py new file mode 100644 index 0000000..7ea5d08 --- /dev/null +++ b/yt_dlp/compat/_legacy.py @@ -0,0 +1,108 @@ +""" Do not use! """ + +import base64 +import collections +import ctypes +import getpass +import html.entities +import html.parser +import http.client +import http.cookiejar +import http.cookies +import http.server +import itertools +import os +import shlex +import shutil +import socket +import struct +import subprocess +import tokenize +import urllib.error +import urllib.parse +import urllib.request +import xml.etree.ElementTree as etree + +# isort: split +import asyncio # noqa: F401 +import re # noqa: F401 +from asyncio import run as compat_asyncio_run # noqa: F401 +from re import Pattern as compat_Pattern # noqa: F401 +from re import match as compat_Match # noqa: F401 + +from . import compat_expanduser, compat_HTMLParseError, compat_realpath +from .compat_utils import passthrough_module +from ..dependencies import brotli as compat_brotli # noqa: F401 +from ..dependencies import websockets as compat_websockets # noqa: F401 +from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 +from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401 + +passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) + + +# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE +# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines +def compat_ctypes_WINFUNCTYPE(*args, **kwargs): + return ctypes.WINFUNCTYPE(*args, **kwargs) + + +def compat_setenv(key, value, env=os.environ): + env[key] = value + + +compat_base64_b64decode = base64.b64decode +compat_basestring = str +compat_casefold = str.casefold +compat_chr = chr +compat_collections_abc = collections.abc +compat_cookiejar = compat_http_cookiejar = http.cookiejar +compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie +compat_cookies = compat_http_cookies = http.cookies +compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie +compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element +compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace +compat_filter = filter +compat_get_terminal_size = shutil.get_terminal_size +compat_getenv = os.getenv +compat_getpass = compat_getpass_getpass = getpass.getpass +compat_html_entities = html.entities +compat_html_entities_html5 = html.entities.html5 +compat_html_parser_HTMLParseError = compat_HTMLParseError +compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser +compat_http_client = http.client +compat_http_server = http.server +compat_input = input +compat_integer_types = (int, ) +compat_itertools_count = itertools.count +compat_kwargs = lambda kwargs: kwargs +compat_map = map +compat_numeric_types = (int, float, complex) +compat_os_path_expanduser = compat_expanduser +compat_os_path_realpath = compat_realpath +compat_print = print +compat_shlex_split = shlex.split +compat_socket_create_connection = socket.create_connection +compat_Struct = struct.Struct +compat_struct_pack = struct.pack +compat_struct_unpack = struct.unpack +compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL +compat_tokenize_tokenize = tokenize.tokenize +compat_urllib_error = urllib.error +compat_urllib_HTTPError = compat_HTTPError +compat_urllib_parse = urllib.parse +compat_urllib_parse_parse_qs = urllib.parse.parse_qs +compat_urllib_parse_quote = urllib.parse.quote +compat_urllib_parse_quote_plus = urllib.parse.quote_plus +compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus +compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes +compat_urllib_parse_urlunparse = urllib.parse.urlunparse +compat_urllib_request = urllib.request +compat_urllib_request_DataHandler = urllib.request.DataHandler +compat_urllib_response = urllib.response +compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve +compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError +compat_xpath = lambda xpath: xpath +compat_zip = zip +workaround_optparse_bug9161 = lambda: None + +legacy = [] diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py new file mode 100644 index 0000000..d62b7d0 --- /dev/null +++ b/yt_dlp/compat/compat_utils.py @@ -0,0 +1,83 @@ +import collections +import contextlib +import functools +import importlib +import sys +import types + +_NO_ATTRIBUTE = object() + +_Package = collections.namedtuple('Package', ('name', 'version')) + + +def get_package_info(module): + return _Package( + name=getattr(module, '_yt_dlp__identifier', module.__name__), + version=str(next(filter(None, ( + getattr(module, attr, None) + for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version') + )), None))) + + +def _is_package(module): + return '__path__' in vars(module) + + +def _is_dunder(name): + return name.startswith('__') and name.endswith('__') + + +class EnhancedModule(types.ModuleType): + def __bool__(self): + return vars(self).get('__bool__', lambda: True)() + + def __getattribute__(self, attr): + try: + ret = super().__getattribute__(attr) + except AttributeError: + if _is_dunder(attr): + raise + getter = getattr(self, '__getattr__', None) + if not getter: + raise + ret = getter(attr) + return ret.fget() if isinstance(ret, property) else ret + + +def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=lambda _: None): + """Passthrough parent module into a child module, creating the parent if necessary""" + def __getattr__(attr): + if _is_package(parent): + with contextlib.suppress(ModuleNotFoundError): + return importlib.import_module(f'.{attr}', parent.__name__) + + ret = from_child(attr) + if ret is _NO_ATTRIBUTE: + raise AttributeError(f'module {parent.__name__} has no attribute {attr}') + callback(attr) + return ret + + @functools.lru_cache(maxsize=None) + def from_child(attr): + nonlocal child + if attr not in allowed_attributes: + if ... not in allowed_attributes or _is_dunder(attr): + return _NO_ATTRIBUTE + + if isinstance(child, str): + child = importlib.import_module(child, parent.__name__) + + if _is_package(child): + with contextlib.suppress(ImportError): + return passthrough_module(f'{parent.__name__}.{attr}', + importlib.import_module(f'.{attr}', child.__name__)) + + with contextlib.suppress(AttributeError): + return getattr(child, attr) + + return _NO_ATTRIBUTE + + parent = sys.modules.get(parent, types.ModuleType(parent)) + parent.__class__ = EnhancedModule + parent.__getattr__ = __getattr__ + return parent diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py new file mode 100644 index 0000000..36c9836 --- /dev/null +++ b/yt_dlp/compat/functools.py @@ -0,0 +1,12 @@ +# flake8: noqa: F405 +from functools import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'functools') +del passthrough_module + +try: + cache # >= 3.9 +except NameError: + cache = lru_cache(maxsize=None) diff --git a/yt_dlp/compat/imghdr.py b/yt_dlp/compat/imghdr.py new file mode 100644 index 0000000..5d64ab0 --- /dev/null +++ b/yt_dlp/compat/imghdr.py @@ -0,0 +1,16 @@ +tests = { + 'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP', + 'png': lambda h: h[:8] == b'\211PNG\r\n\032\n', + 'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'), + 'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'), +} + + +def what(file=None, h=None): + """Detect format of image (Currently supports jpeg, png, webp, gif only) + Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py + """ + if h is None: + with open(file, 'rb') as f: + h = f.read(12) + return next((type_ for type_, test in tests.items() if test(h)), None) diff --git a/yt_dlp/compat/shutil.py b/yt_dlp/compat/shutil.py new file mode 100644 index 0000000..23239d5 --- /dev/null +++ b/yt_dlp/compat/shutil.py @@ -0,0 +1,30 @@ +# flake8: noqa: F405 +from shutil import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'shutil') +del passthrough_module + + +import sys + +if sys.platform.startswith('freebsd'): + import errno + import os + import shutil + + # Workaround for PermissionError when using restricted ACL mode on FreeBSD + def copy2(src, dst, *args, **kwargs): + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + shutil.copyfile(src, dst, *args, **kwargs) + try: + shutil.copystat(src, dst, *args, **kwargs) + except PermissionError as e: + if e.errno != getattr(errno, 'EPERM', None): + raise + return dst + + def move(*args, copy_function=copy2, **kwargs): + return shutil.move(*args, copy_function=copy_function, **kwargs) diff --git a/yt_dlp/compat/types.py b/yt_dlp/compat/types.py new file mode 100644 index 0000000..4aa3b0e --- /dev/null +++ b/yt_dlp/compat/types.py @@ -0,0 +1,13 @@ +# flake8: noqa: F405 +from types import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'types') +del passthrough_module + +try: + # NB: pypy has builtin NoneType, so checking NameError won't work + from types import NoneType # >= 3.10 +except ImportError: + NoneType = type(None) diff --git a/yt_dlp/compat/urllib/__init__.py b/yt_dlp/compat/urllib/__init__.py new file mode 100644 index 0000000..9084b3c --- /dev/null +++ b/yt_dlp/compat/urllib/__init__.py @@ -0,0 +1,10 @@ +# flake8: noqa: F405 +from urllib import * # noqa: F403 + +del request # noqa: F821 +from . import request # noqa: F401 + +from ..compat_utils import passthrough_module + +passthrough_module(__name__, 'urllib') +del passthrough_module diff --git a/yt_dlp/compat/urllib/request.py b/yt_dlp/compat/urllib/request.py new file mode 100644 index 0000000..ad9fa83 --- /dev/null +++ b/yt_dlp/compat/urllib/request.py @@ -0,0 +1,40 @@ +# flake8: noqa: F405 +from urllib.request import * # noqa: F403 + +from ..compat_utils import passthrough_module + +passthrough_module(__name__, 'urllib.request') +del passthrough_module + + +from .. import compat_os_name + +if compat_os_name == 'nt': + # On older Python versions, proxies are extracted from Windows registry erroneously. [1] + # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] + # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade + # it to http on these older Python versions to avoid issues + # This also applies for ftp proxy type, as ftp:// proxy scheme is not supported. + # 1: https://github.com/python/cpython/issues/86793 + # 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698 + import sys + from urllib.request import getproxies_environment, getproxies_registry + + def getproxies_registry_patched(): + proxies = getproxies_registry() + if ( + sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final + or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final + ): + return proxies + + for scheme in ('https', 'ftp'): + if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'): + proxies[scheme] = 'http' + proxies[scheme][len(scheme):] + + return proxies + + def getproxies(): + return getproxies_environment() or getproxies_registry_patched() + +del compat_os_name diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py new file mode 100644 index 0000000..28d174a --- /dev/null +++ b/yt_dlp/cookies.py @@ -0,0 +1,1346 @@ +import base64 +import collections +import contextlib +import glob +import http.cookiejar +import http.cookies +import io +import json +import os +import re +import shutil +import struct +import subprocess +import sys +import tempfile +import time +import urllib.request +from datetime import datetime, timedelta, timezone +from enum import Enum, auto +from hashlib import pbkdf2_hmac + +from .aes import ( + aes_cbc_decrypt_bytes, + aes_gcm_decrypt_and_verify_bytes, + unpad_pkcs7, +) +from .compat import functools # isort: split +from .compat import compat_os_name +from .dependencies import ( + _SECRETSTORAGE_UNAVAILABLE_REASON, + secretstorage, + sqlite3, +) +from .minicurses import MultilinePrinter, QuietMultilinePrinter +from .utils import ( + DownloadError, + Popen, + error_to_str, + expand_path, + is_path_like, + sanitize_url, + str_or_none, + try_call, + write_string, +) +from .utils._utils import _YDLLogger +from .utils.networking import normalize_url + +CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} +SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} + + +class YDLLogger(_YDLLogger): + def warning(self, message, only_once=False): # compat + return super().warning(message, once=only_once) + + class ProgressBar(MultilinePrinter): + _DELAY, _timer = 0.1, 0 + + def print(self, message): + if time.time() - self._timer > self._DELAY: + self.print_at_line(f'[Cookies] {message}', 0) + self._timer = time.time() + + def progress_bar(self): + """Return a context manager with a print method. (Optional)""" + # Do not print to files/pipes, loggers, or when --no-progress is used + if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'): + return + file = self._ydl._out_files.error + try: + if not file.isatty(): + return + except BaseException: + return + return self.ProgressBar(file, preserve_output=False) + + +def _create_progress_bar(logger): + if hasattr(logger, 'progress_bar'): + printer = logger.progress_bar() + if printer: + return printer + printer = QuietMultilinePrinter() + printer.print = lambda _: None + return printer + + +def load_cookies(cookie_file, browser_specification, ydl): + cookie_jars = [] + if browser_specification is not None: + browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) + cookie_jars.append( + extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) + + if cookie_file is not None: + is_filename = is_path_like(cookie_file) + if is_filename: + cookie_file = expand_path(cookie_file) + + jar = YoutubeDLCookieJar(cookie_file) + if not is_filename or os.access(cookie_file, os.R_OK): + jar.load() + cookie_jars.append(jar) + + return _merge_cookie_jars(cookie_jars) + + +def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None): + if browser_name == 'firefox': + return _extract_firefox_cookies(profile, container, logger) + elif browser_name == 'safari': + return _extract_safari_cookies(profile, logger) + elif browser_name in CHROMIUM_BASED_BROWSERS: + return _extract_chrome_cookies(browser_name, profile, keyring, logger) + else: + raise ValueError(f'unknown browser: {browser_name}') + + +def _extract_firefox_cookies(profile, container, logger): + logger.info('Extracting cookies from firefox') + if not sqlite3: + logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' + 'Please use a Python interpreter compiled with sqlite3 support') + return YoutubeDLCookieJar() + + if profile is None: + search_roots = list(_firefox_browser_dirs()) + elif _is_path(profile): + search_roots = [profile] + else: + search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()] + search_root = ', '.join(map(repr, search_roots)) + + cookie_database_path = _newest(_firefox_cookie_dbs(search_roots)) + if cookie_database_path is None: + raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') + logger.debug(f'Extracting cookies from: "{cookie_database_path}"') + + container_id = None + if container not in (None, 'none'): + containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json') + if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK): + raise FileNotFoundError(f'could not read containers.json in {search_root}') + with open(containers_path, encoding='utf8') as containers: + identities = json.load(containers).get('identities', []) + container_id = next((context.get('userContextId') for context in identities if container in ( + context.get('name'), + try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()) + )), None) + if not isinstance(container_id, int): + raise ValueError(f'could not find firefox container "{container}" in containers.json') + + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: + cursor = None + try: + cursor = _open_database_copy(cookie_database_path, tmpdir) + if isinstance(container_id, int): + logger.debug( + f'Only loading cookies from firefox container "{container}", ID {container_id}') + cursor.execute( + 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?', + (f'%userContextId={container_id}', f'%userContextId={container_id}&%')) + elif container == 'none': + logger.debug('Only loading cookies not belonging to any container') + cursor.execute( + 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")') + else: + cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') + jar = YoutubeDLCookieJar() + with _create_progress_bar(logger) as progress_bar: + table = cursor.fetchall() + total_cookie_count = len(table) + for i, (host, name, value, path, expiry, is_secure) in enumerate(table): + progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') + cookie = http.cookiejar.Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + logger.info(f'Extracted {len(jar)} cookies from firefox') + return jar + finally: + if cursor is not None: + cursor.connection.close() + + +def _firefox_browser_dirs(): + if sys.platform in ('cygwin', 'win32'): + yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') + + elif sys.platform == 'darwin': + yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles') + + else: + yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox')) + + +def _firefox_cookie_dbs(roots): + for root in map(os.path.abspath, roots): + for pattern in ('', '*/', 'Profiles/*/'): + yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite')) + + +def _get_chromium_based_browser_settings(browser_name): + # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md + if sys.platform in ('cygwin', 'win32'): + appdata_local = os.path.expandvars('%LOCALAPPDATA%') + appdata_roaming = os.path.expandvars('%APPDATA%') + browser_dir = { + 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'), + 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'), + 'chromium': os.path.join(appdata_local, R'Chromium\User Data'), + 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'), + 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'), + 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'), + }[browser_name] + + elif sys.platform == 'darwin': + appdata = os.path.expanduser('~/Library/Application Support') + browser_dir = { + 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'), + 'chrome': os.path.join(appdata, 'Google/Chrome'), + 'chromium': os.path.join(appdata, 'Chromium'), + 'edge': os.path.join(appdata, 'Microsoft Edge'), + 'opera': os.path.join(appdata, 'com.operasoftware.Opera'), + 'vivaldi': os.path.join(appdata, 'Vivaldi'), + }[browser_name] + + else: + config = _config_home() + browser_dir = { + 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'), + 'chrome': os.path.join(config, 'google-chrome'), + 'chromium': os.path.join(config, 'chromium'), + 'edge': os.path.join(config, 'microsoft-edge'), + 'opera': os.path.join(config, 'opera'), + 'vivaldi': os.path.join(config, 'vivaldi'), + }[browser_name] + + # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: + # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" + keyring_name = { + 'brave': 'Brave', + 'chrome': 'Chrome', + 'chromium': 'Chromium', + 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium', + 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium', + 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome', + }[browser_name] + + browsers_without_profiles = {'opera'} + + return { + 'browser_dir': browser_dir, + 'keyring_name': keyring_name, + 'supports_profiles': browser_name not in browsers_without_profiles + } + + +def _extract_chrome_cookies(browser_name, profile, keyring, logger): + logger.info(f'Extracting cookies from {browser_name}') + + if not sqlite3: + logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' + 'Please use a Python interpreter compiled with sqlite3 support') + return YoutubeDLCookieJar() + + config = _get_chromium_based_browser_settings(browser_name) + + if profile is None: + search_root = config['browser_dir'] + elif _is_path(profile): + search_root = profile + config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile + else: + if config['supports_profiles']: + search_root = os.path.join(config['browser_dir'], profile) + else: + logger.error(f'{browser_name} does not support profiles') + search_root = config['browser_dir'] + + cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger)) + if cookie_database_path is None: + raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') + logger.debug(f'Extracting cookies from: "{cookie_database_path}"') + + decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring) + + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: + cursor = None + try: + cursor = _open_database_copy(cookie_database_path, tmpdir) + cursor.connection.text_factory = bytes + column_names = _get_column_names(cursor, 'cookies') + secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' + cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies') + jar = YoutubeDLCookieJar() + failed_cookies = 0 + unencrypted_cookies = 0 + with _create_progress_bar(logger) as progress_bar: + table = cursor.fetchall() + total_cookie_count = len(table) + for i, line in enumerate(table): + progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') + is_encrypted, cookie = _process_chrome_cookie(decryptor, *line) + if not cookie: + failed_cookies += 1 + continue + elif not is_encrypted: + unencrypted_cookies += 1 + jar.set_cookie(cookie) + if failed_cookies > 0: + failed_message = f' ({failed_cookies} could not be decrypted)' + else: + failed_message = '' + logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}') + counts = decryptor._cookie_counts.copy() + counts['unencrypted'] = unencrypted_cookies + logger.debug(f'cookie version breakdown: {counts}') + return jar + except PermissionError as error: + if compat_os_name == 'nt' and error.errno == 13: + message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info' + logger.error(message) + raise DownloadError(message) # force exit + raise + finally: + if cursor is not None: + cursor.connection.close() + + +def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure): + host_key = host_key.decode() + name = name.decode() + value = value.decode() + path = path.decode() + is_encrypted = not value and encrypted_value + + if is_encrypted: + value = decryptor.decrypt(encrypted_value) + if value is None: + return is_encrypted, None + + return is_encrypted, http.cookiejar.Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, + comment=None, comment_url=None, rest={}) + + +class ChromeCookieDecryptor: + """ + Overview: + + Linux: + - cookies are either v10 or v11 + - v10: AES-CBC encrypted with a fixed key + - also attempts empty password if decryption fails + - v11: AES-CBC encrypted with an OS protected key (keyring) + - also attempts empty password if decryption fails + - v11 keys can be stored in various places depending on the activate desktop environment [2] + + Mac: + - cookies are either v10 or not v10 + - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux + - not v10: 'old data' stored as plaintext + + Windows: + - cookies are either v10 or not v10 + - v10: AES-GCM encrypted with a key which is encrypted with DPAPI + - not v10: encrypted with DPAPI + + Sources: + - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/ + - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc + - KeyStorageLinux::CreateService + """ + + _cookie_counts = {} + + def decrypt(self, encrypted_value): + raise NotImplementedError('Must be implemented by sub classes') + + +def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): + if sys.platform == 'darwin': + return MacChromeCookieDecryptor(browser_keyring_name, logger) + elif sys.platform in ('win32', 'cygwin'): + return WindowsChromeCookieDecryptor(browser_root, logger) + return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) + + +class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name, logger, *, keyring=None): + self._logger = logger + self._v10_key = self.derive_key(b'peanuts') + self._empty_key = self.derive_key(b'') + self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} + self._browser_keyring_name = browser_keyring_name + self._keyring = keyring + + @functools.cached_property + def _v11_key(self): + password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger) + return None if password is None else self.derive_key(password) + + @staticmethod + def derive_key(password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc + return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16) + + def decrypt(self, encrypted_value): + """ + + following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt + with an empty password. The failure detection is not the same as what chromium uses so the + results won't be perfect + + References: + - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/ + - a bugfix to try an empty password as a fallback + """ + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + self._cookie_counts['v10'] += 1 + return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger) + + elif version == b'v11': + self._cookie_counts['v11'] += 1 + if self._v11_key is None: + self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) + return None + return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger) + + else: + self._logger.warning(f'unknown cookie version: "{version}"', only_once=True) + self._cookie_counts['other'] += 1 + return None + + +class MacChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name, logger): + self._logger = logger + password = _get_mac_keyring_password(browser_keyring_name, logger) + self._v10_key = None if password is None else self.derive_key(password) + self._cookie_counts = {'v10': 0, 'other': 0} + + @staticmethod + def derive_key(password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm + return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16) + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + self._cookie_counts['v10'] += 1 + if self._v10_key is None: + self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) + return None + + return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger) + + else: + self._cookie_counts['other'] += 1 + # other prefixes are considered 'old data' which were stored as plaintext + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm + return encrypted_value + + +class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_root, logger): + self._logger = logger + self._v10_key = _get_windows_v10_key(browser_root, logger) + self._cookie_counts = {'v10': 0, 'other': 0} + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + self._cookie_counts['v10'] += 1 + if self._v10_key is None: + self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) + return None + + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc + # kNonceLength + nonce_length = 96 // 8 + # boringssl + # EVP_AEAD_AES_GCM_TAG_LEN + authentication_tag_length = 16 + + raw_ciphertext = ciphertext + nonce = raw_ciphertext[:nonce_length] + ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] + authentication_tag = raw_ciphertext[-authentication_tag_length:] + + return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) + + else: + self._cookie_counts['other'] += 1 + # any other prefix means the data is DPAPI encrypted + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc + return _decrypt_windows_dpapi(encrypted_value, self._logger).decode() + + +def _extract_safari_cookies(profile, logger): + if sys.platform != 'darwin': + raise ValueError(f'unsupported platform: {sys.platform}') + + if profile: + cookies_path = os.path.expanduser(profile) + if not os.path.isfile(cookies_path): + raise FileNotFoundError('custom safari cookies database not found') + + else: + cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') + + if not os.path.isfile(cookies_path): + logger.debug('Trying secondary cookie location') + cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies') + if not os.path.isfile(cookies_path): + raise FileNotFoundError('could not find safari cookies database') + + with open(cookies_path, 'rb') as f: + cookies_data = f.read() + + jar = parse_safari_cookies(cookies_data, logger=logger) + logger.info(f'Extracted {len(jar)} cookies from safari') + return jar + + +class ParserError(Exception): + pass + + +class DataParser: + def __init__(self, data, logger): + self._data = data + self.cursor = 0 + self._logger = logger + + def read_bytes(self, num_bytes): + if num_bytes < 0: + raise ParserError(f'invalid read of {num_bytes} bytes') + end = self.cursor + num_bytes + if end > len(self._data): + raise ParserError('reached end of input') + data = self._data[self.cursor:end] + self.cursor = end + return data + + def expect_bytes(self, expected_value, message): + value = self.read_bytes(len(expected_value)) + if value != expected_value: + raise ParserError(f'unexpected value: {value} != {expected_value} ({message})') + + def read_uint(self, big_endian=False): + data_format = '>I' if big_endian else ' 0: + self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}') + elif num_bytes < 0: + raise ParserError(f'invalid skip of {num_bytes} bytes') + + def skip_to(self, offset, description='unknown'): + self.skip(offset - self.cursor, description) + + def skip_to_end(self, description='unknown'): + self.skip_to(len(self._data), description) + + +def _mac_absolute_time_to_posix(timestamp): + return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp()) + + +def _parse_safari_cookies_header(data, logger): + p = DataParser(data, logger) + p.expect_bytes(b'cook', 'database signature') + number_of_pages = p.read_uint(big_endian=True) + page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)] + return page_sizes, p.cursor + + +def _parse_safari_cookies_page(data, jar, logger): + p = DataParser(data, logger) + p.expect_bytes(b'\x00\x00\x01\x00', 'page signature') + number_of_cookies = p.read_uint() + record_offsets = [p.read_uint() for _ in range(number_of_cookies)] + if number_of_cookies == 0: + logger.debug(f'a cookies page of size {len(data)} has no cookies') + return + + p.skip_to(record_offsets[0], 'unknown page header field') + + with _create_progress_bar(logger) as progress_bar: + for i, record_offset in enumerate(record_offsets): + progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}') + p.skip_to(record_offset, 'space between records') + record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) + p.read_bytes(record_length) + p.skip_to_end('space in between pages') + + +def _parse_safari_cookies_record(data, jar, logger): + p = DataParser(data, logger) + record_size = p.read_uint() + p.skip(4, 'unknown record field 1') + flags = p.read_uint() + is_secure = bool(flags & 0x0001) + p.skip(4, 'unknown record field 2') + domain_offset = p.read_uint() + name_offset = p.read_uint() + path_offset = p.read_uint() + value_offset = p.read_uint() + p.skip(8, 'unknown record field 3') + expiration_date = _mac_absolute_time_to_posix(p.read_double()) + _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841 + + try: + p.skip_to(domain_offset) + domain = p.read_cstring() + + p.skip_to(name_offset) + name = p.read_cstring() + + p.skip_to(path_offset) + path = p.read_cstring() + + p.skip_to(value_offset) + value = p.read_cstring() + except UnicodeDecodeError: + logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True) + return record_size + + p.skip_to(record_size, 'space at the end of the record') + + cookie = http.cookiejar.Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + return record_size + + +def parse_safari_cookies(data, jar=None, logger=YDLLogger()): + """ + References: + - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc + - this data appears to be out of date but the important parts of the database structure is the same + - there are a few bytes here and there which are skipped during parsing + """ + if jar is None: + jar = YoutubeDLCookieJar() + page_sizes, body_start = _parse_safari_cookies_header(data, logger) + p = DataParser(data[body_start:], logger) + for page_size in page_sizes: + _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger) + p.skip_to_end('footer') + return jar + + +class _LinuxDesktopEnvironment(Enum): + """ + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h + DesktopEnvironment + """ + OTHER = auto() + CINNAMON = auto() + DEEPIN = auto() + GNOME = auto() + KDE3 = auto() + KDE4 = auto() + KDE5 = auto() + KDE6 = auto() + PANTHEON = auto() + UKUI = auto() + UNITY = auto() + XFCE = auto() + LXQT = auto() + + +class _LinuxKeyring(Enum): + """ + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h + SelectedLinuxBackend + """ + KWALLET = auto() # KDE4 + KWALLET5 = auto() + KWALLET6 = auto() + GNOMEKEYRING = auto() + BASICTEXT = auto() + + +SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys() + + +def _get_linux_desktop_environment(env, logger): + """ + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc + GetDesktopEnvironment + """ + xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) + desktop_session = env.get('DESKTOP_SESSION', None) + if xdg_current_desktop is not None: + xdg_current_desktop = xdg_current_desktop.split(':')[0].strip() + + if xdg_current_desktop == 'Unity': + if desktop_session is not None and 'gnome-fallback' in desktop_session: + return _LinuxDesktopEnvironment.GNOME + else: + return _LinuxDesktopEnvironment.UNITY + elif xdg_current_desktop == 'Deepin': + return _LinuxDesktopEnvironment.DEEPIN + elif xdg_current_desktop == 'GNOME': + return _LinuxDesktopEnvironment.GNOME + elif xdg_current_desktop == 'X-Cinnamon': + return _LinuxDesktopEnvironment.CINNAMON + elif xdg_current_desktop == 'KDE': + kde_version = env.get('KDE_SESSION_VERSION', None) + if kde_version == '5': + return _LinuxDesktopEnvironment.KDE5 + elif kde_version == '6': + return _LinuxDesktopEnvironment.KDE6 + elif kde_version == '4': + return _LinuxDesktopEnvironment.KDE4 + else: + logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') + return _LinuxDesktopEnvironment.KDE4 + elif xdg_current_desktop == 'Pantheon': + return _LinuxDesktopEnvironment.PANTHEON + elif xdg_current_desktop == 'XFCE': + return _LinuxDesktopEnvironment.XFCE + elif xdg_current_desktop == 'UKUI': + return _LinuxDesktopEnvironment.UKUI + elif xdg_current_desktop == 'LXQt': + return _LinuxDesktopEnvironment.LXQT + else: + logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') + + elif desktop_session is not None: + if desktop_session == 'deepin': + return _LinuxDesktopEnvironment.DEEPIN + elif desktop_session in ('mate', 'gnome'): + return _LinuxDesktopEnvironment.GNOME + elif desktop_session in ('kde4', 'kde-plasma'): + return _LinuxDesktopEnvironment.KDE4 + elif desktop_session == 'kde': + if 'KDE_SESSION_VERSION' in env: + return _LinuxDesktopEnvironment.KDE4 + else: + return _LinuxDesktopEnvironment.KDE3 + elif 'xfce' in desktop_session or desktop_session == 'xubuntu': + return _LinuxDesktopEnvironment.XFCE + elif desktop_session == 'ukui': + return _LinuxDesktopEnvironment.UKUI + else: + logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"') + + else: + if 'GNOME_DESKTOP_SESSION_ID' in env: + return _LinuxDesktopEnvironment.GNOME + elif 'KDE_FULL_SESSION' in env: + if 'KDE_SESSION_VERSION' in env: + return _LinuxDesktopEnvironment.KDE4 + else: + return _LinuxDesktopEnvironment.KDE3 + return _LinuxDesktopEnvironment.OTHER + + +def _choose_linux_keyring(logger): + """ + SelectBackend in [1] + + There is currently support for forcing chromium to use BASIC_TEXT by creating a file called + `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1]) + does not appear to be called anywhere other than in tests, so the user would have to create this file manually + and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring. + + References: + - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc + """ + desktop_environment = _get_linux_desktop_environment(os.environ, logger) + logger.debug(f'detected desktop environment: {desktop_environment.name}') + if desktop_environment == _LinuxDesktopEnvironment.KDE4: + linux_keyring = _LinuxKeyring.KWALLET + elif desktop_environment == _LinuxDesktopEnvironment.KDE5: + linux_keyring = _LinuxKeyring.KWALLET5 + elif desktop_environment == _LinuxDesktopEnvironment.KDE6: + linux_keyring = _LinuxKeyring.KWALLET6 + elif desktop_environment in ( + _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER + ): + linux_keyring = _LinuxKeyring.BASICTEXT + else: + linux_keyring = _LinuxKeyring.GNOMEKEYRING + return linux_keyring + + +def _get_kwallet_network_wallet(keyring, logger): + """ The name of the wallet used to store network passwords. + + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc + KWalletDBus::NetworkWallet + which does a dbus call to the following function: + https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html + Wallet::NetworkWallet + """ + default_wallet = 'kdewallet' + try: + if keyring == _LinuxKeyring.KWALLET: + service_name = 'org.kde.kwalletd' + wallet_path = '/modules/kwalletd' + elif keyring == _LinuxKeyring.KWALLET5: + service_name = 'org.kde.kwalletd5' + wallet_path = '/modules/kwalletd5' + elif keyring == _LinuxKeyring.KWALLET6: + service_name = 'org.kde.kwalletd6' + wallet_path = '/modules/kwalletd6' + else: + raise ValueError(keyring) + + stdout, _, returncode = Popen.run([ + 'dbus-send', '--session', '--print-reply=literal', + f'--dest={service_name}', + wallet_path, + 'org.kde.KWallet.networkWallet' + ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + + if returncode: + logger.warning('failed to read NetworkWallet') + return default_wallet + else: + logger.debug(f'NetworkWallet = "{stdout.strip()}"') + return stdout.strip() + except Exception as e: + logger.warning(f'exception while obtaining NetworkWallet: {e}') + return default_wallet + + +def _get_kwallet_password(browser_keyring_name, keyring, logger): + logger.debug(f'using kwallet-query to obtain password from {keyring.name}') + + if shutil.which('kwallet-query') is None: + logger.error('kwallet-query command not found. KWallet and kwallet-query ' + 'must be installed to read from KWallet. kwallet-query should be' + 'included in the kwallet package for your distribution') + return b'' + + network_wallet = _get_kwallet_network_wallet(keyring, logger) + + try: + stdout, _, returncode = Popen.run([ + 'kwallet-query', + '--read-password', f'{browser_keyring_name} Safe Storage', + '--folder', f'{browser_keyring_name} Keys', + network_wallet + ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + + if returncode: + logger.error(f'kwallet-query failed with return code {returncode}. ' + 'Please consult the kwallet-query man page for details') + return b'' + else: + if stdout.lower().startswith(b'failed to read'): + logger.debug('failed to read password from kwallet. Using empty string instead') + # this sometimes occurs in KDE because chrome does not check hasEntry and instead + # just tries to read the value (which kwallet returns "") whereas kwallet-query + # checks hasEntry. To verify this: + # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" + # while starting chrome. + # this was identified as a bug later and fixed in + # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0 + # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764 + return b'' + else: + logger.debug('password found') + return stdout.rstrip(b'\n') + except Exception as e: + logger.warning(f'exception running kwallet-query: {error_to_str(e)}') + return b'' + + +def _get_gnome_keyring_password(browser_keyring_name, logger): + if not secretstorage: + logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}') + return b'' + # the Gnome keyring does not seem to organise keys in the same way as KWallet, + # using `dbus-monitor` during startup, it can be observed that chromium lists all keys + # and presumably searches for its key in the list. It appears that we must do the same. + # https://github.com/jaraco/keyring/issues/556 + with contextlib.closing(secretstorage.dbus_init()) as con: + col = secretstorage.get_default_collection(con) + for item in col.get_all_items(): + if item.get_label() == f'{browser_keyring_name} Safe Storage': + return item.get_secret() + else: + logger.error('failed to read from keyring') + return b'' + + +def _get_linux_keyring_password(browser_keyring_name, keyring, logger): + # note: chrome/chromium can be run with the following flags to determine which keyring backend + # it has chosen to use + # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_ + # Chromium supports a flag: --password-store= so the automatic detection + # will not be sufficient in all cases. + + keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger) + logger.debug(f'Chosen keyring: {keyring.name}') + + if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6): + return _get_kwallet_password(browser_keyring_name, keyring, logger) + elif keyring == _LinuxKeyring.GNOMEKEYRING: + return _get_gnome_keyring_password(browser_keyring_name, logger) + elif keyring == _LinuxKeyring.BASICTEXT: + # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required) + return None + assert False, f'Unknown keyring {keyring}' + + +def _get_mac_keyring_password(browser_keyring_name, logger): + logger.debug('using find-generic-password to obtain password from OSX keychain') + try: + stdout, _, returncode = Popen.run( + ['security', 'find-generic-password', + '-w', # write password to stdout + '-a', browser_keyring_name, # match 'account' + '-s', f'{browser_keyring_name} Safe Storage'], # match 'service' + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + if returncode: + logger.warning('find-generic-password failed') + return None + return stdout.rstrip(b'\n') + except Exception as e: + logger.warning(f'exception running find-generic-password: {error_to_str(e)}') + return None + + +def _get_windows_v10_key(browser_root, logger): + """ + References: + - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc + """ + path = _newest(_find_files(browser_root, 'Local State', logger)) + if path is None: + logger.error('could not find local state file') + return None + logger.debug(f'Found local state file at "{path}"') + with open(path, encoding='utf8') as f: + data = json.load(f) + try: + # kOsCryptEncryptedKeyPrefName in [1] + base64_key = data['os_crypt']['encrypted_key'] + except KeyError: + logger.error('no encrypted key in Local State') + return None + encrypted_key = base64.b64decode(base64_key) + # kDPAPIKeyPrefix in [1] + prefix = b'DPAPI' + if not encrypted_key.startswith(prefix): + logger.error('invalid key') + return None + return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger) + + +def pbkdf2_sha1(password, salt, iterations, key_length): + return pbkdf2_hmac('sha1', password, salt, iterations, key_length) + + +def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): + for key in keys: + plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) + try: + return plaintext.decode() + except UnicodeDecodeError: + pass + logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) + return None + + +def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): + try: + plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) + except ValueError: + logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True) + return None + + try: + return plaintext.decode() + except UnicodeDecodeError: + logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) + return None + + +def _decrypt_windows_dpapi(ciphertext, logger): + """ + References: + - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata + """ + + import ctypes + import ctypes.wintypes + + class DATA_BLOB(ctypes.Structure): + _fields_ = [('cbData', ctypes.wintypes.DWORD), + ('pbData', ctypes.POINTER(ctypes.c_char))] + + buffer = ctypes.create_string_buffer(ciphertext) + blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer) + blob_out = DATA_BLOB() + ret = ctypes.windll.crypt32.CryptUnprotectData( + ctypes.byref(blob_in), # pDataIn + None, # ppszDataDescr: human readable description of pDataIn + None, # pOptionalEntropy: salt? + None, # pvReserved: must be NULL + None, # pPromptStruct: information about prompts to display + 0, # dwFlags + ctypes.byref(blob_out) # pDataOut + ) + if not ret: + logger.warning('failed to decrypt with DPAPI', only_once=True) + return None + + result = ctypes.string_at(blob_out.pbData, blob_out.cbData) + ctypes.windll.kernel32.LocalFree(blob_out.pbData) + return result + + +def _config_home(): + return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config')) + + +def _open_database_copy(database_path, tmpdir): + # cannot open sqlite databases if they are already in use (e.g. by the browser) + database_copy_path = os.path.join(tmpdir, 'temporary.sqlite') + shutil.copy(database_path, database_copy_path) + conn = sqlite3.connect(database_copy_path) + return conn.cursor() + + +def _get_column_names(cursor, table_name): + table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall() + return [row[1].decode() for row in table_info] + + +def _newest(files): + return max(files, key=lambda path: os.lstat(path).st_mtime, default=None) + + +def _find_files(root, filename, logger): + # if there are multiple browser profiles, take the most recently used one + i = 0 + with _create_progress_bar(logger) as progress_bar: + for curr_root, _, files in os.walk(root): + for file in files: + i += 1 + progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') + if file == filename: + yield os.path.join(curr_root, file) + + +def _merge_cookie_jars(jars): + output_jar = YoutubeDLCookieJar() + for jar in jars: + for cookie in jar: + output_jar.set_cookie(cookie) + if jar.filename is not None: + output_jar.filename = jar.filename + return output_jar + + +def _is_path(value): + return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep) + + +def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None): + if browser_name not in SUPPORTED_BROWSERS: + raise ValueError(f'unsupported browser: "{browser_name}"') + if keyring not in (None, *SUPPORTED_KEYRINGS): + raise ValueError(f'unsupported keyring: "{keyring}"') + if profile is not None and _is_path(expand_path(profile)): + profile = expand_path(profile) + return browser_name, profile, keyring, container + + +class LenientSimpleCookie(http.cookies.SimpleCookie): + """More lenient version of http.cookies.SimpleCookie""" + # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py + # We use Morsel's legal key chars to avoid errors on setting values + _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~') + _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') + + _RESERVED = { + "expires", + "path", + "comment", + "domain", + "max-age", + "secure", + "httponly", + "version", + "samesite", + } + + _FLAGS = {"secure", "httponly"} + + # Added 'bad' group to catch the remaining value + _COOKIE_PATTERN = re.compile(r""" + \s* # Optional whitespace at start of cookie + (?P # Start of group 'key' + [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + ( # Start of potential value + (?P # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any doublequoted string + | # or + \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + | # or + [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string + ) # End of group 'val' + | # or + (?P(?:\\;|[^;])*?) # 'bad' group fallback for invalid values + ) # End of potential value + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, re.ASCII | re.VERBOSE) + + def load(self, data): + # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 + if not isinstance(data, str): + return super().load(data) + + morsel = None + for match in self._COOKIE_PATTERN.finditer(data): + if match.group('bad'): + morsel = None + continue + + key, value = match.group('key', 'val') + + is_attribute = False + if key.startswith('$'): + key = key[1:] + is_attribute = True + + lower_key = key.lower() + if lower_key in self._RESERVED: + if morsel is None: + continue + + if value is None: + if lower_key not in self._FLAGS: + morsel = None + continue + value = True + else: + value, _ = self.value_decode(value) + + morsel[key] = value + + elif is_attribute: + morsel = None + + elif value is not None: + morsel = self.get(key, http.cookies.Morsel()) + real_value, coded_value = self.value_decode(value) + morsel.set(key, real_value, coded_value) + self[key] = morsel + + else: + morsel = None + + +class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): + """ + See [1] for cookie file format. + + 1. https://curl.haxx.se/docs/http-cookies.html + """ + _HTTPONLY_PREFIX = '#HttpOnly_' + _ENTRY_LEN = 7 + _HEADER = '''# Netscape HTTP Cookie File +# This file is generated by yt-dlp. Do not edit. + +''' + _CookieFileEntry = collections.namedtuple( + 'CookieFileEntry', + ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) + + def __init__(self, filename=None, *args, **kwargs): + super().__init__(None, *args, **kwargs) + if is_path_like(filename): + filename = os.fspath(filename) + self.filename = filename + + @staticmethod + def _true_or_false(cndn): + return 'TRUE' if cndn else 'FALSE' + + @contextlib.contextmanager + def open(self, file, *, write=False): + if is_path_like(file): + with open(file, 'w' if write else 'r', encoding='utf-8') as f: + yield f + else: + if write: + file.truncate(0) + yield file + + def _really_save(self, f, ignore_discard, ignore_expires): + now = time.time() + for cookie in self: + if (not ignore_discard and cookie.discard + or not ignore_expires and cookie.is_expired(now)): + continue + name, value = cookie.name, cookie.value + if value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name, value = '', name + f.write('%s\n' % '\t'.join(( + cookie.domain, + self._true_or_false(cookie.domain.startswith('.')), + cookie.path, + self._true_or_false(cookie.secure), + str_or_none(cookie.expires, default=''), + name, value + ))) + + def save(self, filename=None, ignore_discard=True, ignore_expires=True): + """ + Save cookies to a file. + Code is taken from CPython 3.6 + https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """ + + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) + + # Store session cookies with `expires` set to 0 instead of an empty string + for cookie in self: + if cookie.expires is None: + cookie.expires = 0 + + with self.open(filename, write=True) as f: + f.write(self._HEADER) + self._really_save(f, ignore_discard, ignore_expires) + + def load(self, filename=None, ignore_discard=True, ignore_expires=True): + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) + + def prepare_line(line): + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + # comments and empty lines are fine + if line.startswith('#') or not line.strip(): + return line + cookie_list = line.split('\t') + if len(cookie_list) != self._ENTRY_LEN: + raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) + cookie = self._CookieFileEntry(*cookie_list) + if cookie.expires_at and not cookie.expires_at.isdigit(): + raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + return line + + cf = io.StringIO() + with self.open(filename) as f: + for line in f: + try: + cf.write(prepare_line(line)) + except http.cookiejar.LoadError as e: + if f'{line.strip()} '[0] in '[{"': + raise http.cookiejar.LoadError( + 'Cookies file must be Netscape formatted, not JSON. See ' + 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp') + write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') + continue + cf.seek(0) + self._really_load(cf, filename, ignore_discard, ignore_expires) + # Session cookies are denoted by either `expires` field set to + # an empty string or 0. MozillaCookieJar only recognizes the former + # (see [1]). So we need force the latter to be recognized as session + # cookies on our own. + # Session cookies may be important for cookies-based authentication, + # e.g. usually, when user does not check 'Remember me' check box while + # logging in on a site, some important cookies are stored as session + # cookies so that not recognizing them will result in failed login. + # 1. https://bugs.python.org/issue17164 + for cookie in self: + # Treat `expires=0` cookies as session cookies + if cookie.expires == 0: + cookie.expires = None + cookie.discard = True + + def get_cookie_header(self, url): + """Generate a Cookie HTTP header for a given url""" + cookie_req = urllib.request.Request(normalize_url(sanitize_url(url))) + self.add_cookie_header(cookie_req) + return cookie_req.get_header('Cookie') + + def get_cookies_for_url(self, url): + """Generate a list of Cookie objects for a given url""" + # Policy `_now` attribute must be set before calling `_cookies_for_request` + # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360 + self._policy._now = self._now = int(time.time()) + return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url)))) + + def clear(self, *args, **kwargs): + with contextlib.suppress(KeyError): + return super().clear(*args, **kwargs) diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py new file mode 100644 index 0000000..2cfa4c9 --- /dev/null +++ b/yt_dlp/dependencies/Cryptodome.py @@ -0,0 +1,38 @@ +from ..compat.compat_utils import passthrough_module + +try: + import Cryptodome as _parent +except ImportError: + try: + import Crypto as _parent + except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python + _parent = passthrough_module(__name__, 'no_Cryptodome') + __bool__ = lambda: False + +del passthrough_module + +__version__ = '' +AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None +try: + if _parent.__name__ == 'Cryptodome': + from Cryptodome import __version__ + from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 + from Cryptodome.Hash import CMAC, SHA1 + from Cryptodome.PublicKey import RSA + elif _parent.__name__ == 'Crypto': + from Crypto import __version__ + from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 + from Crypto.Hash import CMAC, SHA1 # noqa: F401 + from Crypto.PublicKey import RSA # noqa: F401 +except ImportError: + __version__ = f'broken {__version__}'.strip() + + +_yt_dlp__identifier = _parent.__name__ +if AES and _yt_dlp__identifier == 'Crypto': + try: + # In pycrypto, mode defaults to ECB. See: + # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode + AES.new(b'abcdefghijklmnop') + except TypeError: + _yt_dlp__identifier = 'pycrypto' diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py new file mode 100644 index 0000000..9e3f907 --- /dev/null +++ b/yt_dlp/dependencies/__init__.py @@ -0,0 +1,92 @@ +# flake8: noqa: F401 +"""Imports all optional dependencies for the project. +An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambiguous namespace""" + +try: + import brotlicffi as brotli +except ImportError: + try: + import brotli + except ImportError: + brotli = None + + +try: + import certifi +except ImportError: + certifi = None +else: + from os.path import exists as _path_exists + + # The certificate may not be bundled in executable + if not _path_exists(certifi.where()): + certifi = None + + +try: + import mutagen +except ImportError: + mutagen = None + + +secretstorage = None +try: + import secretstorage + _SECRETSTORAGE_UNAVAILABLE_REASON = None +except ImportError: + _SECRETSTORAGE_UNAVAILABLE_REASON = ( + 'as the `secretstorage` module is not installed. ' + 'Please install by running `python3 -m pip install secretstorage`') +except Exception as _err: + _SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' + + +try: + import sqlite3 + # We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152 + sqlite3._yt_dlp__version = sqlite3.sqlite_version +except ImportError: + # although sqlite3 is part of the standard library, it is possible to compile Python without + # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 + sqlite3 = None + + +try: + import websockets +except ImportError: + websockets = None + +try: + import urllib3 +except ImportError: + urllib3 = None + +try: + import requests +except ImportError: + requests = None + +try: + import xattr # xattr or pyxattr +except ImportError: + xattr = None +else: + if hasattr(xattr, 'set'): # pyxattr + xattr._yt_dlp__identifier = 'pyxattr' + + +from . import Cryptodome + +all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')} +available_dependencies = {k: v for k, v in all_dependencies.items() if v} + + +# Deprecated +Cryptodome_AES = Cryptodome.AES + + +__all__ = [ + 'all_dependencies', + 'available_dependencies', + *all_dependencies.keys(), +] diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py new file mode 100644 index 0000000..51a9f28 --- /dev/null +++ b/yt_dlp/downloader/__init__.py @@ -0,0 +1,131 @@ +from ..utils import NO_DEFAULT, determine_protocol + + +def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): + info_dict['protocol'] = determine_protocol(info_dict) + info_copy = info_dict.copy() + info_copy['to_stdout'] = to_stdout + + protocols = (protocol or info_copy['protocol']).split('+') + downloaders = [_get_suitable_downloader(info_copy, proto, params, default) for proto in protocols] + + if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): + return FFmpegFD + elif (set(downloaders) == {DashSegmentsFD} + and not (to_stdout and len(protocols) > 1) + and set(protocols) == {'http_dash_segments_generator'}): + return DashSegmentsFD + elif len(downloaders) == 1: + return downloaders[0] + return None + + +# Some of these require get_suitable_downloader +from .common import FileDownloader +from .dash import DashSegmentsFD +from .external import FFmpegFD, get_external_downloader +from .f4m import F4mFD +from .fc2 import FC2LiveFD +from .hls import HlsFD +from .http import HttpFD +from .ism import IsmFD +from .mhtml import MhtmlFD +from .niconico import NiconicoDmcFD, NiconicoLiveFD +from .rtmp import RtmpFD +from .rtsp import RtspFD +from .websocket import WebSocketFragmentFD +from .youtube_live_chat import YoutubeLiveChatFD + +PROTOCOL_MAP = { + 'rtmp': RtmpFD, + 'rtmpe': RtmpFD, + 'rtmp_ffmpeg': FFmpegFD, + 'm3u8_native': HlsFD, + 'm3u8': FFmpegFD, + 'mms': RtspFD, + 'rtsp': RtspFD, + 'f4m': F4mFD, + 'http_dash_segments': DashSegmentsFD, + 'http_dash_segments_generator': DashSegmentsFD, + 'ism': IsmFD, + 'mhtml': MhtmlFD, + 'niconico_dmc': NiconicoDmcFD, + 'niconico_live': NiconicoLiveFD, + 'fc2_live': FC2LiveFD, + 'websocket_frag': WebSocketFragmentFD, + 'youtube_live_chat': YoutubeLiveChatFD, + 'youtube_live_chat_replay': YoutubeLiveChatFD, +} + + +def shorten_protocol_name(proto, simplify=False): + short_protocol_names = { + 'm3u8_native': 'm3u8', + 'm3u8': 'm3u8F', + 'rtmp_ffmpeg': 'rtmpF', + 'http_dash_segments': 'dash', + 'http_dash_segments_generator': 'dashG', + 'niconico_dmc': 'dmc', + 'websocket_frag': 'WSfrag', + } + if simplify: + short_protocol_names.update({ + 'https': 'http', + 'ftps': 'ftp', + 'm3u8': 'm3u8', # Reverse above m3u8 mapping + 'm3u8_native': 'm3u8', + 'http_dash_segments_generator': 'dash', + 'rtmp_ffmpeg': 'rtmp', + 'm3u8_frag_urls': 'm3u8', + 'dash_frag_urls': 'dash', + }) + return short_protocol_names.get(proto, proto) + + +def _get_suitable_downloader(info_dict, protocol, params, default): + """Get the downloader class that can handle the info dict.""" + if default is NO_DEFAULT: + default = HttpFD + + if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict): + return FFmpegFD + + info_dict['protocol'] = protocol + downloaders = params.get('external_downloader') + external_downloader = ( + downloaders if isinstance(downloaders, str) or downloaders is None + else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default'))) + + if external_downloader is None: + if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): + return FFmpegFD + elif external_downloader.lower() != 'native': + ed = get_external_downloader(external_downloader) + if ed.can_download(info_dict, external_downloader): + return ed + + if protocol == 'http_dash_segments': + if info_dict.get('is_live') and (external_downloader or '').lower() != 'native': + return FFmpegFD + + if protocol in ('m3u8', 'm3u8_native'): + if info_dict.get('is_live'): + return FFmpegFD + elif (external_downloader or '').lower() == 'native': + return HlsFD + elif protocol == 'm3u8_native' and get_suitable_downloader( + info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']): + return HlsFD + elif params.get('hls_prefer_native') is True: + return HlsFD + elif params.get('hls_prefer_native') is False: + return FFmpegFD + + return PROTOCOL_MAP.get(protocol, default) + + +__all__ = [ + 'FileDownloader', + 'get_suitable_downloader', + 'shorten_protocol_name', +] diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py new file mode 100644 index 0000000..b71d7ee --- /dev/null +++ b/yt_dlp/downloader/common.py @@ -0,0 +1,486 @@ +import contextlib +import errno +import functools +import os +import random +import re +import time + +from ..minicurses import ( + BreaklineStatusPrinter, + MultilineLogger, + MultilinePrinter, + QuietMultilinePrinter, +) +from ..utils import ( + IDENTITY, + NO_DEFAULT, + LockingUnsupportedError, + Namespace, + RetryManager, + classproperty, + decodeArgument, + deprecation_warning, + encodeFilename, + format_bytes, + join_nonempty, + parse_bytes, + remove_start, + sanitize_open, + shell_quote, + timeconvert, + timetuple_from_msec, + try_call, +) + + +class FileDownloader: + """File Downloader class. + + File downloader objects are the ones responsible of downloading the + actual video file and writing it to disk. + + File downloaders accept a lot of parameters. In order not to saturate + the object constructor with arguments, it receives a dictionary of + options instead. + + Available options: + + verbose: Print additional info to stdout. + quiet: Do not print messages to stdout. + ratelimit: Download speed limit, in bytes/sec. + throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) + retries: Number of times to retry for expected network errors. + Default is 0 for API, but 10 for CLI + file_access_retries: Number of times to retry on file access error (default: 3) + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + test: Download only first bytes to test the downloader. + min_filesize: Skip files smaller than this size + max_filesize: Skip files larger than this size + xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. + external_downloader_args: A dictionary of downloader keys (in lower case) + and a list of additional command-line arguments for the + executable. Use 'default' as the name for arguments to be + passed to all downloaders. For compatibility with youtube-dl, + a single list of args can also be used + hls_use_mpegts: Use the mpegts container for HLS videos. + http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be + useful for bypassing bandwidth throttling imposed by + a webserver (experimental) + progress_template: See YoutubeDL.py + retry_sleep_functions: See YoutubeDL.py + + Subclasses of this one must re-define the real_download method. + """ + + _TEST_FILE_SIZE = 10241 + params = None + + def __init__(self, ydl, params): + """Create a FileDownloader object with the given options.""" + self._set_ydl(ydl) + self._progress_hooks = [] + self.params = params + self._prepare_multiline_status() + self.add_progress_hook(self.report_progress) + + def _set_ydl(self, ydl): + self.ydl = ydl + + for func in ( + 'deprecation_warning', + 'deprecated_feature', + 'report_error', + 'report_file_already_downloaded', + 'report_warning', + 'to_console_title', + 'to_stderr', + 'trouble', + 'write_debug', + ): + if not hasattr(self, func): + setattr(self, func, getattr(ydl, func)) + + def to_screen(self, *args, **kargs): + self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) + + __to_screen = to_screen + + @classproperty + def FD_NAME(cls): + return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower() + + @staticmethod + def format_seconds(seconds): + if seconds is None: + return ' Unknown' + time = timetuple_from_msec(seconds * 1000) + if time.hours > 99: + return '--:--:--' + return '%02d:%02d:%02d' % time[:-1] + + @classmethod + def format_eta(cls, seconds): + return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}' + + @staticmethod + def calc_percent(byte_counter, data_len): + if data_len is None: + return None + return float(byte_counter) / float(data_len) * 100.0 + + @staticmethod + def format_percent(percent): + return ' N/A%' if percent is None else f'{percent:>5.1f}%' + + @classmethod + def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT): + if total is NO_DEFAULT: + rate, remaining = start_or_rate, now_or_remaining + if None in (rate, remaining): + return None + return int(float(remaining) / rate) + + start, now = start_or_rate, now_or_remaining + if total is None: + return None + if now is None: + now = time.time() + rate = cls.calc_speed(start, now, current) + return rate and int((float(total) - float(current)) / rate) + + @staticmethod + def calc_speed(start, now, bytes): + dif = now - start + if bytes == 0 or dif < 0.001: # One millisecond + return None + return float(bytes) / dif + + @staticmethod + def format_speed(speed): + return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s' + + @staticmethod + def format_retries(retries): + return 'inf' if retries == float('inf') else int(retries) + + @staticmethod + def filesize_or_none(unencoded_filename): + if os.path.isfile(unencoded_filename): + return os.path.getsize(unencoded_filename) + return 0 + + @staticmethod + def best_block_size(elapsed_time, bytes): + new_min = max(bytes / 2.0, 1.0) + new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB + if elapsed_time < 0.001: + return int(new_max) + rate = bytes / elapsed_time + if rate > new_max: + return int(new_max) + if rate < new_min: + return int(new_min) + return int(rate) + + @staticmethod + def parse_bytes(bytestr): + """Parse a string indicating a byte quantity into an integer.""" + deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and ' + 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead') + return parse_bytes(bytestr) + + def slow_down(self, start_time, now, byte_counter): + """Sleep if the download speed is over the rate limit.""" + rate_limit = self.params.get('ratelimit') + if rate_limit is None or byte_counter == 0: + return + if now is None: + now = time.time() + elapsed = now - start_time + if elapsed <= 0.0: + return + speed = float(byte_counter) / elapsed + if speed > rate_limit: + sleep_time = float(byte_counter) / rate_limit - elapsed + if sleep_time > 0: + time.sleep(sleep_time) + + def temp_name(self, filename): + """Returns a temporary filename for the given filename.""" + if self.params.get('nopart', False) or filename == '-' or \ + (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): + return filename + return filename + '.part' + + def undo_temp_name(self, filename): + if filename.endswith('.part'): + return filename[:-len('.part')] + return filename + + def ytdl_filename(self, filename): + return filename + '.ytdl' + + def wrap_file_access(action, *, fatal=False): + def error_callback(err, count, retries, *, fd): + return RetryManager.report_retry( + err, count, retries, info=fd.__to_screen, + warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')), + error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'), + sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access')) + + def wrapper(self, func, *args, **kwargs): + for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self): + try: + return func(self, *args, **kwargs) + except OSError as err: + if err.errno in (errno.EACCES, errno.EINVAL): + retry.error = err + continue + retry.error_callback(err, 1, 0) + + return functools.partial(functools.partialmethod, wrapper) + + @wrap_file_access('open', fatal=True) + def sanitize_open(self, filename, open_mode): + f, filename = sanitize_open(filename, open_mode) + if not getattr(f, 'locked', None): + self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True) + return f, filename + + @wrap_file_access('remove') + def try_remove(self, filename): + if os.path.isfile(filename): + os.remove(filename) + + @wrap_file_access('rename') + def try_rename(self, old_filename, new_filename): + if old_filename == new_filename: + return + os.replace(old_filename, new_filename) + + def try_utime(self, filename, last_modified_hdr): + """Try to set the last-modified time of the given file.""" + if last_modified_hdr is None: + return + if not os.path.isfile(encodeFilename(filename)): + return + timestr = last_modified_hdr + if timestr is None: + return + filetime = timeconvert(timestr) + if filetime is None: + return filetime + # Ignore obviously invalid dates + if filetime == 0: + return + with contextlib.suppress(Exception): + os.utime(filename, (time.time(), filetime)) + return filetime + + def report_destination(self, filename): + """Report destination filename.""" + self.to_screen('[download] Destination: ' + filename) + + def _prepare_multiline_status(self, lines=1): + if self.params.get('noprogress'): + self._multiline = QuietMultilinePrinter() + elif self.ydl.params.get('logger'): + self._multiline = MultilineLogger(self.ydl.params['logger'], lines) + elif self.params.get('progress_with_newline'): + self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines) + else: + self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet')) + self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color' + self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out + + def _finish_multiline_status(self): + self._multiline.end() + + ProgressStyles = Namespace( + downloaded_bytes='light blue', + percent='light blue', + eta='yellow', + speed='green', + elapsed='bold white', + total_bytes='', + total_bytes_estimate='', + ) + + def _report_progress_status(self, s, default_template): + for name, style in self.ProgressStyles.items_: + name = f'_{name}_str' + if name not in s: + continue + s[name] = self._format_progress(s[name], style) + s['_default_template'] = default_template % s + + progress_dict = s.copy() + progress_dict.pop('info_dict') + progress_dict = {'info': s['info_dict'], 'progress': progress_dict} + + progress_template = self.params.get('progress_template', {}) + self._multiline.print_at_line(self.ydl.evaluate_outtmpl( + progress_template.get('download') or '[download] %(progress._default_template)s', + progress_dict), s.get('progress_idx') or 0) + self.to_console_title(self.ydl.evaluate_outtmpl( + progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', + progress_dict)) + + def _format_progress(self, *args, **kwargs): + return self.ydl._format_text( + self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) + + def report_progress(self, s): + def with_fields(*tups, default=''): + for *fields, tmpl in tups: + if all(s.get(f) is not None for f in fields): + return tmpl + return default + + _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}' + + if s['status'] == 'finished': + if self.params.get('noprogress'): + self.to_screen('[download] Download completed') + speed = try_call(lambda: s['total_bytes'] / s['elapsed']) + s.update({ + 'speed': speed, + '_speed_str': self.format_speed(speed).strip(), + '_total_bytes_str': _format_bytes('total_bytes'), + '_elapsed_str': self.format_seconds(s.get('elapsed')), + '_percent_str': self.format_percent(100), + }) + self._report_progress_status(s, join_nonempty( + '100%%', + with_fields(('total_bytes', 'of %(_total_bytes_str)s')), + with_fields(('elapsed', 'in %(_elapsed_str)s')), + with_fields(('speed', 'at %(_speed_str)s')), + delim=' ')) + + if s['status'] != 'downloading': + return + + s.update({ + '_eta_str': self.format_eta(s.get('eta')).strip(), + '_speed_str': self.format_speed(s.get('speed')), + '_percent_str': self.format_percent(try_call( + lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], + lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], + lambda: s['downloaded_bytes'] == 0 and 0)), + '_total_bytes_str': _format_bytes('total_bytes'), + '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), + '_downloaded_bytes_str': _format_bytes('downloaded_bytes'), + '_elapsed_str': self.format_seconds(s.get('elapsed')), + }) + + msg_template = with_fields( + ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'), + ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'), + ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'), + ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'), + default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s') + + msg_template += with_fields( + ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'), + ('fragment_index', ' (frag %(fragment_index)s)')) + self._report_progress_status(s, msg_template) + + def report_resuming_byte(self, resume_len): + """Report attempt to resume at given byte.""" + self.to_screen('[download] Resuming download at byte %s' % resume_len) + + def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): + """Report retry""" + is_frag = False if frag_index is NO_DEFAULT else 'fragment' + RetryManager.report_retry( + err, count, retries, info=self.__to_screen, + warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'), + error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'), + sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'), + suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None) + + def report_unable_to_resume(self): + """Report it was impossible to resume download.""" + self.to_screen('[download] Unable to resume') + + @staticmethod + def supports_manifest(manifest): + """ Whether the downloader can download the fragments from the manifest. + Redefine in subclasses if needed. """ + pass + + def download(self, filename, info_dict, subtitle=False): + """Download to a filename using the info from info_dict + Return True on success and False otherwise + """ + nooverwrites_and_exists = ( + not self.params.get('overwrites', True) + and os.path.exists(encodeFilename(filename)) + ) + + if not hasattr(filename, 'write'): + continuedl_and_exists = ( + self.params.get('continuedl', True) + and os.path.isfile(encodeFilename(filename)) + and not self.params.get('nopart', False) + ) + + # Check file already present + if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): + self.report_file_already_downloaded(filename) + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + 'total_bytes': os.path.getsize(encodeFilename(filename)), + }, info_dict) + self._finish_multiline_status() + return True, False + + if subtitle: + sleep_interval = self.params.get('sleep_interval_subtitles') or 0 + else: + min_sleep_interval = self.params.get('sleep_interval') or 0 + sleep_interval = random.uniform( + min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval) + if sleep_interval > 0: + self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') + time.sleep(sleep_interval) + + ret = self.real_download(filename, info_dict) + self._finish_multiline_status() + return ret, True + + def real_download(self, filename, info_dict): + """Real download process. Redefine in subclasses.""" + raise NotImplementedError('This method must be implemented by subclasses') + + def _hook_progress(self, status, info_dict): + # Ideally we want to make a copy of the dict, but that is too slow + status['info_dict'] = info_dict + # youtube-dl passes the same status object to all the hooks. + # Some third party scripts seems to be relying on this. + # So keep this behavior if possible + for ph in self._progress_hooks: + ph(status) + + def add_progress_hook(self, ph): + # See YoutubeDl.py (search for progress_hooks) for a description of + # this interface + self._progress_hooks.append(ph) + + def _debug_cmd(self, args, exe=None): + if not self.params.get('verbose', False): + return + + str_args = [decodeArgument(a) for a in args] + + if exe is None: + exe = os.path.basename(str_args[0]) + + self.write_debug(f'{exe} command line: {shell_quote(str_args)}') diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py new file mode 100644 index 0000000..afc79b6 --- /dev/null +++ b/yt_dlp/downloader/dash.py @@ -0,0 +1,90 @@ +import time +import urllib.parse + +from . import get_suitable_downloader +from .fragment import FragmentFD +from ..utils import update_url_query, urljoin + + +class DashSegmentsFD(FragmentFD): + """ + Download segments in a DASH manifest. External downloaders can take over + the fragment downloads by supporting the 'dash_frag_urls' protocol + """ + + FD_NAME = 'dashsegments' + + def real_download(self, filename, info_dict): + if 'http_dash_segments_generator' in info_dict['protocol'].split('+'): + real_downloader = None # No external FD can support --live-from-start + else: + if info_dict.get('is_live'): + self.report_error('Live DASH videos are not supported') + real_downloader = get_suitable_downloader( + info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) + + real_start = time.time() + + requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] + args = [] + for fmt in requested_formats or [info_dict]: + try: + fragment_count = 1 if self.params.get('test') else len(fmt['fragments']) + except TypeError: + fragment_count = None + ctx = { + 'filename': fmt.get('filepath') or filename, + 'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'), + 'total_frags': fragment_count, + } + + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, fmt) + ctx['start'] = real_start + + extra_query = None + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + if extra_param_to_segment_url: + extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) + + fragments_to_download = self._get_fragments(fmt, ctx, extra_query) + + if real_downloader: + self.to_screen( + f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') + info_dict['fragments'] = list(fragments_to_download) + fd = real_downloader(self.ydl, self.params) + return fd.real_download(filename, info_dict) + + args.append([ctx, fragments_to_download, fmt]) + + return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0) + + def _resolve_fragments(self, fragments, ctx): + fragments = fragments(ctx) if callable(fragments) else fragments + return [next(iter(fragments))] if self.params.get('test') else fragments + + def _get_fragments(self, fmt, ctx, extra_query): + fragment_base_url = fmt.get('fragment_base_url') + fragments = self._resolve_fragments(fmt['fragments'], ctx) + + frag_index = 0 + for i, fragment in enumerate(fragments): + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + if extra_query: + fragment_url = update_url_query(fragment_url, extra_query) + + yield { + 'frag_index': frag_index, + 'fragment_count': fragment.get('fragment_count'), + 'index': i, + 'url': fragment_url, + } diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py new file mode 100644 index 0000000..ce5eeb0 --- /dev/null +++ b/yt_dlp/downloader/external.py @@ -0,0 +1,664 @@ +import enum +import json +import os +import re +import subprocess +import sys +import tempfile +import time +import uuid + +from .fragment import FragmentFD +from ..compat import functools +from ..networking import Request +from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor +from ..utils import ( + Popen, + RetryManager, + _configuration_args, + check_executable, + classproperty, + cli_bool_option, + cli_option, + cli_valueless_option, + determine_ext, + encodeArgument, + encodeFilename, + find_available_port, + remove_end, + traverse_obj, +) + + +class Features(enum.Enum): + TO_STDOUT = enum.auto() + MULTIPLE_FORMATS = enum.auto() + + +class ExternalFD(FragmentFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') + SUPPORTED_FEATURES = () + _CAPTURE_STDERR = True + + def real_download(self, filename, info_dict): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + self._cookies_tempfile = None + + try: + started = time.time() + retval = self._call_downloader(tmpfilename, info_dict) + except KeyboardInterrupt: + if not info_dict.get('is_live'): + raise + # Live stream downloading cancellation should be considered as + # correct and expected termination thus all postprocessing + # should take place + retval = 0 + self.to_screen('[%s] Interrupted by user' % self.get_basename()) + finally: + if self._cookies_tempfile: + self.try_remove(self._cookies_tempfile) + + if retval == 0: + status = { + 'filename': filename, + 'status': 'finished', + 'elapsed': time.time() - started, + } + if filename != '-': + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.try_rename(tmpfilename, filename) + status.update({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + }) + self._hook_progress(status, info_dict) + return True + else: + self.to_stderr('\n') + self.report_error('%s exited with code %d' % ( + self.get_basename(), retval)) + return False + + @classmethod + def get_basename(cls): + return cls.__name__[:-2].lower() + + @classproperty + def EXE_NAME(cls): + return cls.get_basename() + + @functools.cached_property + def exe(self): + return self.EXE_NAME + + @classmethod + def available(cls, path=None): + path = check_executable( + cls.EXE_NAME if path in (None, cls.get_basename()) else path, + [cls.AVAILABLE_OPT]) + if not path: + return False + cls.exe = path + return path + + @classmethod + def supports(cls, info_dict): + return all(( + not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, + '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, + not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), + all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), + )) + + @classmethod + def can_download(cls, info_dict, path=None): + return cls.available(path) and cls.supports(info_dict) + + def _option(self, command_option, param): + return cli_option(self.params, command_option, param) + + def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None): + return cli_bool_option(self.params, command_option, param, true_value, false_value, separator) + + def _valueless_option(self, command_option, param, expected_value=True): + return cli_valueless_option(self.params, command_option, param, expected_value) + + def _configuration_args(self, keys=None, *args, **kwargs): + return _configuration_args( + self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME, + keys, *args, **kwargs) + + def _write_cookies(self): + if not self.ydl.cookiejar.filename: + tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False) + tmp_cookies.close() + self._cookies_tempfile = tmp_cookies.name + self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"') + # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename + self.ydl.cookiejar.save(self._cookies_tempfile) + return self.ydl.cookiejar.filename or self._cookies_tempfile + + def _call_downloader(self, tmpfilename, info_dict): + """ Either overwrite this or implement _make_cmd """ + cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] + + self._debug_cmd(cmd) + + if 'fragments' not in info_dict: + _, stderr, returncode = self._call_process(cmd, info_dict) + if returncode and stderr: + self.to_stderr(stderr) + return returncode + + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, + frag_index=None, fatal=not skip_unavailable_fragments) + for retry in retry_manager: + _, stderr, returncode = self._call_process(cmd, info_dict) + if not returncode: + break + # TODO: Decide whether to retry based on error code + # https://aria2.github.io/manual/en/html/aria2c.html#exit-status + if stderr: + self.to_stderr(stderr) + retry.error = Exception() + continue + if not skip_unavailable_fragments and retry_manager.error: + return -1 + + decrypt_fragment = self.decrypter(info_dict) + dest, _ = self.sanitize_open(tmpfilename, 'wb') + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + try: + src, _ = self.sanitize_open(fragment_filename, 'rb') + except OSError as err: + if skip_unavailable_fragments and frag_index > 1: + self.report_skip_fragment(frag_index, err) + continue + self.report_error(f'Unable to open fragment {frag_index}; {err}') + return -1 + dest.write(decrypt_fragment(fragment, src.read())) + src.close() + if not self.params.get('keep_fragments', False): + self.try_remove(encodeFilename(fragment_filename)) + dest.close() + self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) + return 0 + + def _call_process(self, cmd, info_dict): + return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) + + +class CurlFD(ExternalFD): + AVAILABLE_OPT = '-V' + _CAPTURE_STDERR = False # curl writes the progress to stderr + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] + cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) + if cookie_header: + cmd += ['--cookie', cookie_header] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', f'{key}: {val}'] + + cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') + cmd += self._valueless_option('--silent', 'noprogress') + cmd += self._valueless_option('--verbose', 'verbose') + cmd += self._option('--limit-rate', 'ratelimit') + retry = self._option('--retry', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '2147483647' + cmd += retry + cmd += self._option('--max-filesize', 'max_filesize') + cmd += self._option('--interface', 'source_address') + cmd += self._option('--proxy', 'proxy') + cmd += self._valueless_option('--insecure', 'nocheckcertificate') + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + +class AxelFD(ExternalFD): + AVAILABLE_OPT = '-V' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-o', tmpfilename] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['-H', f'{key}: {val}'] + cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) + if cookie_header: + cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0'] + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + +class WgetFD(ExternalFD): + AVAILABLE_OPT = '--version' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto'] + if self.ydl.cookiejar.get_cookie_header(info_dict['url']): + cmd += ['--load-cookies', self._write_cookies()] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', f'{key}: {val}'] + cmd += self._option('--limit-rate', 'ratelimit') + retry = self._option('--tries', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '0' + cmd += retry + cmd += self._option('--bind-address', 'source_address') + proxy = self.params.get('proxy') + if proxy: + for var in ('http_proxy', 'https_proxy'): + cmd += ['--execute', f'{var}={proxy}'] + cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + +class Aria2cFD(ExternalFD): + AVAILABLE_OPT = '-v' + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls') + + @staticmethod + def supports_manifest(manifest): + UNSUPPORTED_FEATURES = [ + r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + ] + check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) + return all(check_results) + + @staticmethod + def _aria2c_filename(fn): + return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' + + def _call_downloader(self, tmpfilename, info_dict): + # FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931 + if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []): + info_dict['__rpc'] = { + 'port': find_available_port() or 19190, + 'secret': str(uuid.uuid4()), + } + return super()._call_downloader(tmpfilename, info_dict) + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-c', '--no-conf', + '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', + '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] + if 'fragments' in info_dict: + cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] + else: + cmd += ['--min-split-size', '1M'] + + if self.ydl.cookiejar.get_cookie_header(info_dict['url']): + cmd += [f'--load-cookies={self._write_cookies()}'] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', f'{key}: {val}'] + cmd += self._option('--max-overall-download-limit', 'ratelimit') + cmd += self._option('--interface', 'source_address') + cmd += self._option('--all-proxy', 'proxy') + cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') + cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') + cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') + cmd += self._configuration_args() + + if '__rpc' in info_dict: + cmd += [ + '--enable-rpc', + f'--rpc-listen-port={info_dict["__rpc"]["port"]}', + f'--rpc-secret={info_dict["__rpc"]["secret"]}'] + + # aria2c strips out spaces from the beginning/end of filenames and paths. + # We work around this issue by adding a "./" to the beginning of the + # filename and relative path, and adding a "/" at the end of the path. + # See: https://github.com/yt-dlp/yt-dlp/issues/276 + # https://github.com/ytdl-org/youtube-dl/issues/20312 + # https://github.com/aria2/aria2/issues/1373 + dn = os.path.dirname(tmpfilename) + if dn: + cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep] + if 'fragments' not in info_dict: + cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))] + cmd += ['--auto-file-renaming=false'] + + if 'fragments' in info_dict: + cmd += ['--uri-selector=inorder'] + url_list_file = '%s.frag.urls' % tmpfilename + url_list = [] + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) + url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename))) + stream, _ = self.sanitize_open(url_list_file, 'wb') + stream.write('\n'.join(url_list).encode()) + stream.close() + cmd += ['-i', self._aria2c_filename(url_list_file)] + else: + cmd += ['--', info_dict['url']] + return cmd + + def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): + # Does not actually need to be UUID, just unique + sanitycheck = str(uuid.uuid4()) + d = json.dumps({ + 'jsonrpc': '2.0', + 'id': sanitycheck, + 'method': method, + 'params': [f'token:{rpc_secret}', *params], + }).encode('utf-8') + request = Request( + f'http://localhost:{rpc_port}/jsonrpc', + data=d, headers={ + 'Content-Type': 'application/json', + 'Content-Length': f'{len(d)}', + }, proxies={'all': None}) + with self.ydl.urlopen(request) as r: + resp = json.load(r) + assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' + return resp['result'] + + def _call_process(self, cmd, info_dict): + if '__rpc' not in info_dict: + return super()._call_process(cmd, info_dict) + + send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret']) + started = time.time() + + fragmented = 'fragments' in info_dict + frag_count = len(info_dict['fragments']) if fragmented else 1 + status = { + 'filename': info_dict.get('_filename'), + 'status': 'downloading', + 'elapsed': 0, + 'downloaded_bytes': 0, + 'fragment_count': frag_count if fragmented else None, + 'fragment_index': 0 if fragmented else None, + } + self._hook_progress(status, info_dict) + + def get_stat(key, *obj, average=False): + val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0] + return sum(val) / (len(val) if average else 1) + + with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p: + # Add a small sleep so that RPC client can receive response, + # or the connection stalls infinitely + time.sleep(0.2) + retval = p.poll() + while retval is None: + # We don't use tellStatus as we won't know the GID without reading stdout + # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive + active = send_rpc('aria2.tellActive') + completed = send_rpc('aria2.tellStopped', [0, frag_count]) + + downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active) + speed = get_stat('downloadSpeed', active) + total = frag_count * get_stat('totalLength', active, completed, average=True) + if total < downloaded: + total = None + + status.update({ + 'downloaded_bytes': int(downloaded), + 'speed': speed, + 'total_bytes': None if fragmented else total, + 'total_bytes_estimate': total, + 'eta': (total - downloaded) / (speed or 1), + 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, + 'elapsed': time.time() - started + }) + self._hook_progress(status, info_dict) + + if not active and len(completed) >= frag_count: + send_rpc('aria2.shutdown') + retval = p.wait() + break + + time.sleep(0.1) + retval = p.poll() + + return '', p.stderr.read(), retval + + +class HttpieFD(ExternalFD): + AVAILABLE_OPT = '--version' + EXE_NAME = 'http' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] + + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += [f'{key}:{val}'] + + # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1] + # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2] + # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq + # 2: https://httpie.io/docs/cli/sessions + cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) + if cookie_header: + cmd += [f'Cookie:{cookie_header}'] + return cmd + + +class FFmpegFD(ExternalFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments') + SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS) + + @classmethod + def available(cls, path=None): + # TODO: Fix path for ffmpeg + # Fixme: This may be wrong when --ffmpeg-location is used + return FFmpegPostProcessor().available + + def on_process_started(self, proc, stdin): + """ Override this in subclasses """ + pass + + @classmethod + def can_merge_formats(cls, info_dict, params): + return ( + info_dict.get('requested_formats') + and info_dict.get('protocol') + and not params.get('allow_unplayable_formats') + and 'no-direct-merge' not in params.get('compat_opts', []) + and cls.can_download(info_dict)) + + def _call_downloader(self, tmpfilename, info_dict): + ffpp = FFmpegPostProcessor(downloader=self) + if not ffpp.available: + self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') + return False + ffpp.check_version() + + args = [ffpp.executable, '-y'] + + for log_level in ('quiet', 'verbose'): + if self.params.get(log_level, False): + args += ['-loglevel', log_level] + break + if not self.params.get('verbose'): + args += ['-hide_banner'] + + args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[]) + + # These exists only for compatibility. Extractors should use + # info_dict['downloader_options']['ffmpeg_args'] instead + args += info_dict.get('_ffmpeg_args') or [] + seekable = info_dict.get('_seekable') + if seekable is not None: + # setting -seekable prevents ffmpeg from guessing if the server + # supports seeking(by adding the header `Range: bytes=0-`), which + # can cause problems in some cases + # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 + # http://trac.ffmpeg.org/ticket/6125#comment:10 + args += ['-seekable', '1' if seekable else '0'] + + env = None + proxy = self.params.get('proxy') + if proxy: + if not re.match(r'^[\da-zA-Z]+://', proxy): + proxy = 'http://%s' % proxy + + if proxy.startswith('socks'): + self.report_warning( + '%s does not support SOCKS proxies. Downloading is likely to fail. ' + 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) + + # Since December 2015 ffmpeg supports -http_proxy option (see + # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) + # We could switch to the following code if we are able to detect version properly + # args += ['-http_proxy', proxy] + env = os.environ.copy() + env['HTTP_PROXY'] = proxy + env['http_proxy'] = proxy + + protocol = info_dict.get('protocol') + + if protocol == 'rtmp': + player_url = info_dict.get('player_url') + page_url = info_dict.get('page_url') + app = info_dict.get('app') + play_path = info_dict.get('play_path') + tc_url = info_dict.get('tc_url') + flash_version = info_dict.get('flash_version') + live = info_dict.get('rtmp_live', False) + conn = info_dict.get('rtmp_conn') + if player_url is not None: + args += ['-rtmp_swfverify', player_url] + if page_url is not None: + args += ['-rtmp_pageurl', page_url] + if app is not None: + args += ['-rtmp_app', app] + if play_path is not None: + args += ['-rtmp_playpath', play_path] + if tc_url is not None: + args += ['-rtmp_tcurl', tc_url] + if flash_version is not None: + args += ['-rtmp_flashver', flash_version] + if live: + args += ['-rtmp_live', 'live'] + if isinstance(conn, list): + for entry in conn: + args += ['-rtmp_conn', entry] + elif isinstance(conn, str): + args += ['-rtmp_conn', conn] + + start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end') + + selected_formats = info_dict.get('requested_formats') or [info_dict] + for i, fmt in enumerate(selected_formats): + is_http = re.match(r'^https?://', fmt['url']) + cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] + if cookies: + args.extend(['-cookies', ''.join( + f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n' + for cookie in cookies)]) + if fmt.get('http_headers') and is_http: + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. + args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())]) + + if start_time: + args += ['-ss', str(start_time)] + if end_time: + args += ['-t', str(end_time - start_time)] + + args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] + + if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): + args += ['-c', 'copy'] + + if info_dict.get('requested_formats') or protocol == 'http_dash_segments': + for i, fmt in enumerate(selected_formats): + stream_number = fmt.get('manifest_stream_number', 0) + args.extend(['-map', f'{i}:{stream_number}']) + + if self.params.get('test', False): + args += ['-fs', str(self._TEST_FILE_SIZE)] + + ext = info_dict['ext'] + if protocol in ('m3u8', 'm3u8_native'): + use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts') + if use_mpegts is None: + use_mpegts = info_dict.get('is_live') + if use_mpegts: + args += ['-f', 'mpegts'] + else: + args += ['-f', 'mp4'] + if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + args += ['-bsf:a', 'aac_adtstoasc'] + elif protocol == 'rtmp': + args += ['-f', 'flv'] + elif ext == 'mp4' and tmpfilename == '-': + args += ['-f', 'mpegts'] + elif ext == 'unknown_video': + ext = determine_ext(remove_end(tmpfilename, '.part')) + if ext == 'unknown_video': + self.report_warning( + 'The video format is unknown and cannot be downloaded by ffmpeg. ' + 'Explicitly set the extension in the filename to attempt download in that format') + else: + self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename') + args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] + else: + args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] + + args += self._configuration_args(('_o1', '_o', '')) + + args = [encodeArgument(opt) for opt in args] + args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) + self._debug_cmd(args) + + piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) + with Popen(args, stdin=subprocess.PIPE, env=env) as proc: + if piped: + self.on_process_started(proc, proc.stdin) + try: + retval = proc.wait() + except BaseException as e: + # subprocces.run would send the SIGKILL signal to ffmpeg and the + # mp4 file couldn't be played, but if we ask ffmpeg to quit it + # produces a file that is playable (this is mostly useful for live + # streams). Note that Windows is not affected and produces playable + # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). + if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped: + proc.communicate_or_kill(b'q') + else: + proc.kill(timeout=None) + raise + return retval + + +class AVconvFD(FFmpegFD): + pass + + +_BY_NAME = { + klass.get_basename(): klass + for name, klass in globals().items() + if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') +} + + +def list_external_downloaders(): + return sorted(_BY_NAME.keys()) + + +def get_external_downloader(external_downloader): + """ Given the name of the executable, see whether we support the given downloader """ + bn = os.path.splitext(os.path.basename(external_downloader))[0] + return _BY_NAME.get(bn) or next(( + klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn + ), None) diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py new file mode 100644 index 0000000..28cbba0 --- /dev/null +++ b/yt_dlp/downloader/f4m.py @@ -0,0 +1,427 @@ +import base64 +import io +import itertools +import struct +import time +import urllib.parse + +from .fragment import FragmentFD +from ..compat import compat_etree_fromstring +from ..networking.exceptions import HTTPError +from ..utils import fix_xml_ampersands, xpath_text + + +class DataTruncatedError(Exception): + pass + + +class FlvReader(io.BytesIO): + """ + Reader for Flv files + The file format is documented in https://www.adobe.com/devnet/f4v.html + """ + + def read_bytes(self, n): + data = self.read(n) + if len(data) < n: + raise DataTruncatedError( + 'FlvReader error: need %d bytes while only %d bytes got' % ( + n, len(data))) + return data + + # Utility functions for reading numbers and strings + def read_unsigned_long_long(self): + return struct.unpack('!Q', self.read_bytes(8))[0] + + def read_unsigned_int(self): + return struct.unpack('!I', self.read_bytes(4))[0] + + def read_unsigned_char(self): + return struct.unpack('!B', self.read_bytes(1))[0] + + def read_string(self): + res = b'' + while True: + char = self.read_bytes(1) + if char == b'\x00': + break + res += char + return res + + def read_box_info(self): + """ + Read a box and return the info as a tuple: (box_size, box_type, box_data) + """ + real_size = size = self.read_unsigned_int() + box_type = self.read_bytes(4) + header_end = 8 + if size == 1: + real_size = self.read_unsigned_long_long() + header_end = 16 + return real_size, box_type, self.read_bytes(real_size - header_end) + + def read_asrt(self): + # version + self.read_unsigned_char() + # flags + self.read_bytes(3) + quality_entry_count = self.read_unsigned_char() + # QualityEntryCount + for i in range(quality_entry_count): + self.read_string() + + segment_run_count = self.read_unsigned_int() + segments = [] + for i in range(segment_run_count): + first_segment = self.read_unsigned_int() + fragments_per_segment = self.read_unsigned_int() + segments.append((first_segment, fragments_per_segment)) + + return { + 'segment_run': segments, + } + + def read_afrt(self): + # version + self.read_unsigned_char() + # flags + self.read_bytes(3) + # time scale + self.read_unsigned_int() + + quality_entry_count = self.read_unsigned_char() + # QualitySegmentUrlModifiers + for i in range(quality_entry_count): + self.read_string() + + fragments_count = self.read_unsigned_int() + fragments = [] + for i in range(fragments_count): + first = self.read_unsigned_int() + first_ts = self.read_unsigned_long_long() + duration = self.read_unsigned_int() + if duration == 0: + discontinuity_indicator = self.read_unsigned_char() + else: + discontinuity_indicator = None + fragments.append({ + 'first': first, + 'ts': first_ts, + 'duration': duration, + 'discontinuity_indicator': discontinuity_indicator, + }) + + return { + 'fragments': fragments, + } + + def read_abst(self): + # version + self.read_unsigned_char() + # flags + self.read_bytes(3) + + self.read_unsigned_int() # BootstrapinfoVersion + # Profile,Live,Update,Reserved + flags = self.read_unsigned_char() + live = flags & 0x20 != 0 + # time scale + self.read_unsigned_int() + # CurrentMediaTime + self.read_unsigned_long_long() + # SmpteTimeCodeOffset + self.read_unsigned_long_long() + + self.read_string() # MovieIdentifier + server_count = self.read_unsigned_char() + # ServerEntryTable + for i in range(server_count): + self.read_string() + quality_count = self.read_unsigned_char() + # QualityEntryTable + for i in range(quality_count): + self.read_string() + # DrmData + self.read_string() + # MetaData + self.read_string() + + segments_count = self.read_unsigned_char() + segments = [] + for i in range(segments_count): + box_size, box_type, box_data = self.read_box_info() + assert box_type == b'asrt' + segment = FlvReader(box_data).read_asrt() + segments.append(segment) + fragments_run_count = self.read_unsigned_char() + fragments = [] + for i in range(fragments_run_count): + box_size, box_type, box_data = self.read_box_info() + assert box_type == b'afrt' + fragments.append(FlvReader(box_data).read_afrt()) + + return { + 'segments': segments, + 'fragments': fragments, + 'live': live, + } + + def read_bootstrap_info(self): + total_size, box_type, box_data = self.read_box_info() + assert box_type == b'abst' + return FlvReader(box_data).read_abst() + + +def read_bootstrap_info(bootstrap_bytes): + return FlvReader(bootstrap_bytes).read_bootstrap_info() + + +def build_fragments_list(boot_info): + """ Return a list of (segment, fragment) for each fragment in the video """ + res = [] + segment_run_table = boot_info['segments'][0] + fragment_run_entry_table = boot_info['fragments'][0]['fragments'] + first_frag_number = fragment_run_entry_table[0]['first'] + fragments_counter = itertools.count(first_frag_number) + for segment, fragments_count in segment_run_table['segment_run']: + # In some live HDS streams (e.g. Rai), `fragments_count` is + # abnormal and causing out-of-memory errors. It's OK to change the + # number of fragments for live streams as they are updated periodically + if fragments_count == 4294967295 and boot_info['live']: + fragments_count = 2 + for _ in range(fragments_count): + res.append((segment, next(fragments_counter))) + + if boot_info['live']: + res = res[-2:] + + return res + + +def write_unsigned_int(stream, val): + stream.write(struct.pack('!I', val)) + + +def write_unsigned_int_24(stream, val): + stream.write(struct.pack('!I', val)[1:]) + + +def write_flv_header(stream): + """Writes the FLV header to stream""" + # FLV header + stream.write(b'FLV\x01') + stream.write(b'\x05') + stream.write(b'\x00\x00\x00\x09') + stream.write(b'\x00\x00\x00\x00') + + +def write_metadata_tag(stream, metadata): + """Writes optional metadata tag to stream""" + SCRIPT_TAG = b'\x12' + FLV_TAG_HEADER_LEN = 11 + + if metadata: + stream.write(SCRIPT_TAG) + write_unsigned_int_24(stream, len(metadata)) + stream.write(b'\x00\x00\x00\x00\x00\x00\x00') + stream.write(metadata) + write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) + + +def remove_encrypted_media(media): + return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib + and 'drmAdditionalHeaderSetId' not in e.attrib, + media)) + + +def _add_ns(prop, ver=1): + return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) + + +def get_base_url(manifest): + base_url = xpath_text( + manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], + 'base URL', default=None) + if base_url: + base_url = base_url.strip() + return base_url + + +class F4mFD(FragmentFD): + """ + A downloader for f4m manifests or AdobeHDS. + """ + + def _get_unencrypted_media(self, doc): + media = doc.findall(_add_ns('media')) + if not media: + self.report_error('No media found') + if not self.params.get('allow_unplayable_formats'): + for e in (doc.findall(_add_ns('drmAdditionalHeader')) + + doc.findall(_add_ns('drmAdditionalHeaderSet'))): + # If id attribute is missing it's valid for all media nodes + # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute + if 'id' not in e.attrib: + self.report_error('Missing ID in f4m DRM') + media = remove_encrypted_media(media) + if not media: + self.report_error('Unsupported DRM') + return media + + def _get_bootstrap_from_url(self, bootstrap_url): + bootstrap = self.ydl.urlopen(bootstrap_url).read() + return read_bootstrap_info(bootstrap) + + def _update_live_fragments(self, bootstrap_url, latest_fragment): + fragments_list = [] + retries = 30 + while (not fragments_list) and (retries > 0): + boot_info = self._get_bootstrap_from_url(bootstrap_url) + fragments_list = build_fragments_list(boot_info) + fragments_list = [f for f in fragments_list if f[1] > latest_fragment] + if not fragments_list: + # Retry after a while + time.sleep(5.0) + retries -= 1 + + if not fragments_list: + self.report_error('Failed to update fragments') + + return fragments_list + + def _parse_bootstrap_node(self, node, base_url): + # Sometimes non empty inline bootstrap info can be specified along + # with bootstrap url attribute (e.g. dummy inline bootstrap info + # contains whitespace characters in [1]). We will prefer bootstrap + # url over inline bootstrap info when present. + # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m + bootstrap_url = node.get('url') + if bootstrap_url: + bootstrap_url = urllib.parse.urljoin( + base_url, bootstrap_url) + boot_info = self._get_bootstrap_from_url(bootstrap_url) + else: + bootstrap_url = None + bootstrap = base64.b64decode(node.text) + boot_info = read_bootstrap_info(bootstrap) + return boot_info, bootstrap_url + + def real_download(self, filename, info_dict): + man_url = info_dict['url'] + requested_bitrate = info_dict.get('tbr') + self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) + + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.url + # Some manifests may be malformed, e.g. prosiebensat1 generated manifests + # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 + # and https://github.com/ytdl-org/youtube-dl/issues/7823) + manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() + + doc = compat_etree_fromstring(manifest) + formats = [(int(f.attrib.get('bitrate', -1)), f) + for f in self._get_unencrypted_media(doc)] + if requested_bitrate is None or len(formats) == 1: + # get the best format + formats = sorted(formats, key=lambda f: f[0]) + rate, media = formats[-1] + else: + rate, media = list(filter( + lambda f: int(f[0]) == requested_bitrate, formats))[0] + + # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. + man_base_url = get_base_url(doc) or man_url + + base_url = urllib.parse.urljoin(man_base_url, media.attrib['url']) + bootstrap_node = doc.find(_add_ns('bootstrapInfo')) + boot_info, bootstrap_url = self._parse_bootstrap_node( + bootstrap_node, man_base_url) + live = boot_info['live'] + metadata_node = media.find(_add_ns('metadata')) + if metadata_node is not None: + metadata = base64.b64decode(metadata_node.text) + else: + metadata = None + + fragments_list = build_fragments_list(boot_info) + test = self.params.get('test', False) + if test: + # We only download the first fragment + fragments_list = fragments_list[:1] + total_frags = len(fragments_list) + # For some akamai manifests we'll need to add a query to the fragment url + akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) + + ctx = { + 'filename': filename, + 'total_frags': total_frags, + 'live': bool(live), + } + + self._prepare_frag_download(ctx) + + dest_stream = ctx['dest_stream'] + + if ctx['complete_frags_downloaded_bytes'] == 0: + write_flv_header(dest_stream) + if not live: + write_metadata_tag(dest_stream, metadata) + + base_url_parsed = urllib.parse.urlparse(base_url) + + self._start_frag_download(ctx, info_dict) + + frag_index = 0 + while fragments_list: + seg_i, frag_i = fragments_list.pop(0) + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + name = 'Seg%d-Frag%d' % (seg_i, frag_i) + query = [] + if base_url_parsed.query: + query.append(base_url_parsed.query) + if akamai_pv: + query.append(akamai_pv.strip(';')) + if info_dict.get('extra_param_to_segment_url'): + query.append(info_dict['extra_param_to_segment_url']) + url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) + try: + success = self._download_fragment(ctx, url_parsed.geturl(), info_dict) + if not success: + return False + down_data = self._read_fragment(ctx) + reader = FlvReader(down_data) + while True: + try: + _, box_type, box_data = reader.read_box_info() + except DataTruncatedError: + if test: + # In tests, segments may be truncated, and thus + # FlvReader may not be able to parse the whole + # chunk. If so, write the segment as is + # See https://github.com/ytdl-org/youtube-dl/issues/9214 + dest_stream.write(down_data) + break + raise + if box_type == b'mdat': + self._append_fragment(ctx, box_data) + break + except HTTPError as err: + if live and (err.status == 404 or err.status == 410): + # We didn't keep up with the live window. Continue + # with the next available fragment. + msg = 'Fragment %d unavailable' % frag_i + self.report_warning(msg) + fragments_list = [] + else: + raise + + if not fragments_list and not test and live and bootstrap_url: + fragments_list = self._update_live_fragments(bootstrap_url, frag_i) + total_frags += len(fragments_list) + if fragments_list and (fragments_list[0][1] > frag_i + 1): + msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) + self.report_warning(msg) + + return self._finish_frag_download(ctx, info_dict) diff --git a/yt_dlp/downloader/fc2.py b/yt_dlp/downloader/fc2.py new file mode 100644 index 0000000..f9763de --- /dev/null +++ b/yt_dlp/downloader/fc2.py @@ -0,0 +1,46 @@ +import threading + +from .common import FileDownloader +from .external import FFmpegFD + + +class FC2LiveFD(FileDownloader): + """ + Downloads FC2 live without being stopped.
+ Note, this is not a part of public API, and will be removed without notice. + DO NOT USE + """ + + def real_download(self, filename, info_dict): + ws = info_dict['ws'] + + heartbeat_lock = threading.Lock() + heartbeat_state = [None, 1] + + def heartbeat(): + if heartbeat_state[1] < 0: + return + + try: + heartbeat_state[1] += 1 + ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) + except Exception: + self.to_screen('[fc2:live] Heartbeat failed') + + with heartbeat_lock: + heartbeat_state[0] = threading.Timer(30, heartbeat) + heartbeat_state[0]._daemonic = True + heartbeat_state[0].start() + + heartbeat() + + new_info_dict = info_dict.copy() + new_info_dict.update({ + 'ws': None, + 'protocol': 'live_ffmpeg', + }) + try: + return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) + finally: + # stop heartbeating + heartbeat_state[1] = -1 diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py new file mode 100644 index 0000000..b4f003d --- /dev/null +++ b/yt_dlp/downloader/fragment.py @@ -0,0 +1,527 @@ +import concurrent.futures +import contextlib +import json +import math +import os +import struct +import time + +from .common import FileDownloader +from .http import HttpFD +from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 +from ..compat import compat_os_name +from ..networking import Request +from ..networking.exceptions import HTTPError, IncompleteRead +from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj +from ..utils.networking import HTTPHeaderDict +from ..utils.progress import ProgressCalculator + + +class HttpQuietDownloader(HttpFD): + def to_screen(self, *args, **kargs): + pass + + to_console_title = to_screen + + +class FragmentFD(FileDownloader): + """ + A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). + + Available options: + + fragment_retries: Number of times to retry a fragment for HTTP error + (DASH and hlsnative only). Default is 0 for API, but 10 for CLI + skip_unavailable_fragments: + Skip unavailable fragments (DASH and hlsnative only) + keep_fragments: Keep downloaded fragments on disk after downloading is + finished + concurrent_fragment_downloads: The number of threads to use for native hls and dash downloads + _no_ytdl_file: Don't use .ytdl file + + For each incomplete fragment download yt-dlp keeps on disk a special + bookkeeping file with download state and metadata (in future such files will + be used for any incomplete download handled by yt-dlp). This file is + used to properly handle resuming, check download file consistency and detect + potential errors. The file has a .ytdl extension and represents a standard + JSON file of the following format: + + extractor: + Dictionary of extractor related data. TBD. + + downloader: + Dictionary of downloader related data. May contain following data: + current_fragment: + Dictionary with current (being downloaded) fragment data: + index: 0-based index of current fragment among all fragments + fragment_count: + Total count of fragments + + This feature is experimental and file format may change in future. + """ + + def report_retry_fragment(self, err, frag_index, count, retries): + self.deprecation_warning('yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. ' + 'Use yt_dlp.downloader.FileDownloader.report_retry instead') + return self.report_retry(err, count, retries, frag_index) + + def report_skip_fragment(self, frag_index, err=None): + err = f' {err};' if err else '' + self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...') + + def _prepare_url(self, info_dict, url): + headers = info_dict.get('http_headers') + return Request(url, None, headers) if headers else url + + def _prepare_and_start_frag_download(self, ctx, info_dict): + self._prepare_frag_download(ctx) + self._start_frag_download(ctx, info_dict) + + def __do_ytdl_file(self, ctx): + return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file') + + def _read_ytdl_file(self, ctx): + assert 'ytdl_corrupt' not in ctx + stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'r') + try: + ytdl_data = json.loads(stream.read()) + ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index'] + if 'extra_state' in ytdl_data['downloader']: + ctx['extra_state'] = ytdl_data['downloader']['extra_state'] + except Exception: + ctx['ytdl_corrupt'] = True + finally: + stream.close() + + def _write_ytdl_file(self, ctx): + frag_index_stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'w') + try: + downloader = { + 'current_fragment': { + 'index': ctx['fragment_index'], + }, + } + if 'extra_state' in ctx: + downloader['extra_state'] = ctx['extra_state'] + if ctx.get('fragment_count') is not None: + downloader['fragment_count'] = ctx['fragment_count'] + frag_index_stream.write(json.dumps({'downloader': downloader})) + finally: + frag_index_stream.close() + + def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None): + fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) + fragment_info_dict = { + 'url': frag_url, + 'http_headers': headers or info_dict.get('http_headers'), + 'request_data': request_data, + 'ctx_id': ctx.get('ctx_id'), + } + frag_resume_len = 0 + if ctx['dl'].params.get('continuedl', True): + frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename)) + fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len + + success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict) + if not success: + return False + if fragment_info_dict.get('filetime'): + ctx['fragment_filetime'] = fragment_info_dict.get('filetime') + ctx['fragment_filename_sanitized'] = fragment_filename + return True + + def _read_fragment(self, ctx): + if not ctx.get('fragment_filename_sanitized'): + return None + try: + down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + except FileNotFoundError: + if ctx.get('live'): + return None + raise + ctx['fragment_filename_sanitized'] = frag_sanitized + frag_content = down.read() + down.close() + return frag_content + + def _append_fragment(self, ctx, frag_content): + try: + ctx['dest_stream'].write(frag_content) + ctx['dest_stream'].flush() + finally: + if self.__do_ytdl_file(ctx): + self._write_ytdl_file(ctx) + if not self.params.get('keep_fragments', False): + self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) + del ctx['fragment_filename_sanitized'] + + def _prepare_frag_download(self, ctx): + if not ctx.setdefault('live', False): + total_frags_str = '%d' % ctx['total_frags'] + ad_frags = ctx.get('ad_frags', 0) + if ad_frags: + total_frags_str += ' (not including %d ad)' % ad_frags + else: + total_frags_str = 'unknown (live)' + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') + self.report_destination(ctx['filename']) + dl = HttpQuietDownloader(self.ydl, { + **self.params, + 'noprogress': True, + 'test': False, + 'sleep_interval': 0, + 'max_sleep_interval': 0, + 'sleep_interval_subtitles': 0, + }) + tmpfilename = self.temp_name(ctx['filename']) + open_mode = 'wb' + + # Establish possible resume length + resume_len = self.filesize_or_none(tmpfilename) + if resume_len > 0: + open_mode = 'ab' + + # Should be initialized before ytdl file check + ctx.update({ + 'tmpfilename': tmpfilename, + 'fragment_index': 0, + }) + + if self.__do_ytdl_file(ctx): + ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) + continuedl = self.params.get('continuedl', True) + if continuedl and ytdl_file_exists: + self._read_ytdl_file(ctx) + is_corrupt = ctx.get('ytdl_corrupt') is True + is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 + if is_corrupt or is_inconsistent: + message = ( + '.ytdl file is corrupt' if is_corrupt else + 'Inconsistent state of incomplete fragment download') + self.report_warning( + '%s. Restarting from the beginning ...' % message) + ctx['fragment_index'] = resume_len = 0 + if 'ytdl_corrupt' in ctx: + del ctx['ytdl_corrupt'] + self._write_ytdl_file(ctx) + + else: + if not continuedl: + if ytdl_file_exists: + self._read_ytdl_file(ctx) + ctx['fragment_index'] = resume_len = 0 + self._write_ytdl_file(ctx) + assert ctx['fragment_index'] == 0 + + dest_stream, tmpfilename = self.sanitize_open(tmpfilename, open_mode) + + ctx.update({ + 'dl': dl, + 'dest_stream': dest_stream, + 'tmpfilename': tmpfilename, + # Total complete fragments downloaded so far in bytes + 'complete_frags_downloaded_bytes': resume_len, + }) + + def _start_frag_download(self, ctx, info_dict): + resume_len = ctx['complete_frags_downloaded_bytes'] + total_frags = ctx['total_frags'] + ctx_id = ctx.get('ctx_id') + # Stores the download progress, updated by the progress hook + state = { + 'status': 'downloading', + 'downloaded_bytes': resume_len, + 'fragment_index': ctx['fragment_index'], + 'fragment_count': total_frags, + 'filename': ctx['filename'], + 'tmpfilename': ctx['tmpfilename'], + } + + ctx['started'] = time.time() + progress = ProgressCalculator(resume_len) + + def frag_progress_hook(s): + if s['status'] not in ('downloading', 'finished'): + return + + if not total_frags and ctx.get('fragment_count'): + state['fragment_count'] = ctx['fragment_count'] + + if ctx_id is not None and s.get('ctx_id') != ctx_id: + return + + state['max_progress'] = ctx.get('max_progress') + state['progress_idx'] = ctx.get('progress_idx') + + state['elapsed'] = progress.elapsed + frag_total_bytes = s.get('total_bytes') or 0 + s['fragment_info_dict'] = s.pop('info_dict', {}) + + # XXX: Fragment resume is not accounted for here + if not ctx['live']: + estimated_size = ( + (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) + / (state['fragment_index'] + 1) * total_frags) + progress.total = estimated_size + progress.update(s.get('downloaded_bytes')) + state['total_bytes_estimate'] = progress.total + else: + progress.update(s.get('downloaded_bytes')) + + if s['status'] == 'finished': + state['fragment_index'] += 1 + ctx['fragment_index'] = state['fragment_index'] + progress.thread_reset() + + state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded + state['speed'] = ctx['speed'] = progress.speed.smooth + state['eta'] = progress.eta.smooth + + self._hook_progress(state, info_dict) + + ctx['dl'].add_progress_hook(frag_progress_hook) + + return ctx['started'] + + def _finish_frag_download(self, ctx, info_dict): + ctx['dest_stream'].close() + if self.__do_ytdl_file(ctx): + self.try_remove(self.ytdl_filename(ctx['filename'])) + elapsed = time.time() - ctx['started'] + + to_file = ctx['tmpfilename'] != '-' + if to_file: + downloaded_bytes = self.filesize_or_none(ctx['tmpfilename']) + else: + downloaded_bytes = ctx['complete_frags_downloaded_bytes'] + + if not downloaded_bytes: + if to_file: + self.try_remove(ctx['tmpfilename']) + self.report_error('The downloaded file is empty') + return False + elif to_file: + self.try_rename(ctx['tmpfilename'], ctx['filename']) + filetime = ctx.get('fragment_filetime') + if self.params.get('updatetime', True) and filetime: + with contextlib.suppress(Exception): + os.utime(ctx['filename'], (time.time(), filetime)) + + self._hook_progress({ + 'downloaded_bytes': downloaded_bytes, + 'total_bytes': downloaded_bytes, + 'filename': ctx['filename'], + 'status': 'finished', + 'elapsed': elapsed, + 'ctx_id': ctx.get('ctx_id'), + 'max_progress': ctx.get('max_progress'), + 'progress_idx': ctx.get('progress_idx'), + }, info_dict) + return True + + def _prepare_external_frag_download(self, ctx): + if 'live' not in ctx: + ctx['live'] = False + if not ctx['live']: + total_frags_str = '%d' % ctx['total_frags'] + ad_frags = ctx.get('ad_frags', 0) + if ad_frags: + total_frags_str += ' (not including %d ad)' % ad_frags + else: + total_frags_str = 'unknown (live)' + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') + + tmpfilename = self.temp_name(ctx['filename']) + + # Should be initialized before ytdl file check + ctx.update({ + 'tmpfilename': tmpfilename, + 'fragment_index': 0, + }) + + def decrypter(self, info_dict): + _key_cache = {} + + def _get_key(url): + if url not in _key_cache: + _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read() + return _key_cache[url] + + def decrypt_fragment(fragment, frag_content): + if frag_content is None: + return + decrypt_info = fragment.get('decrypt_info') + if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': + return frag_content + iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence']) + decrypt_info['KEY'] = (decrypt_info.get('KEY') + or _get_key(traverse_obj(info_dict, ('hls_aes', 'uri')) or decrypt_info['URI'])) + # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block + # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, + # not what it decrypts to. + if self.params.get('test', False): + return frag_content + return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)) + + return decrypt_fragment + + def download_and_append_fragments_multiple(self, *args, **kwargs): + ''' + @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... + all args must be either tuple or list + ''' + interrupt_trigger = [True] + max_progress = len(args) + if max_progress == 1: + return self.download_and_append_fragments(*args[0], **kwargs) + max_workers = self.params.get('concurrent_fragment_downloads', 1) + if max_progress > 1: + self._prepare_multiline_status(max_progress) + is_live = any(traverse_obj(args, (..., 2, 'is_live'))) + + def thread_func(idx, ctx, fragments, info_dict, tpe): + ctx['max_progress'] = max_progress + ctx['progress_idx'] = idx + return self.download_and_append_fragments( + ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger) + + class FTPE(concurrent.futures.ThreadPoolExecutor): + # has to stop this or it's going to wait on the worker thread itself + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + if compat_os_name == 'nt': + def future_result(future): + while True: + try: + return future.result(0.1) + except KeyboardInterrupt: + raise + except concurrent.futures.TimeoutError: + continue + else: + def future_result(future): + return future.result() + + def interrupt_trigger_iter(fg): + for f in fg: + if not interrupt_trigger[0]: + break + yield f + + spins = [] + for idx, (ctx, fragments, info_dict) in enumerate(args): + tpe = FTPE(math.ceil(max_workers / max_progress)) + job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe) + spins.append((tpe, job)) + + result = True + for tpe, job in spins: + try: + result = result and future_result(job) + except KeyboardInterrupt: + interrupt_trigger[0] = False + finally: + tpe.shutdown(wait=True) + if not interrupt_trigger[0] and not is_live: + raise KeyboardInterrupt() + # we expect the user wants to stop and DO WANT the preceding postprocessors to run; + # so returning a intermediate result here instead of KeyboardInterrupt on live + return result + + def download_and_append_fragments( + self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False), + pack_func=(lambda content, idx: content), finish_func=None, + tpe=None, interrupt_trigger=(True, )): + + if not self.params.get('skip_unavailable_fragments', True): + is_fatal = lambda _: True + + def download_fragment(fragment, ctx): + if not interrupt_trigger[0]: + return + + frag_index = ctx['fragment_index'] = fragment['frag_index'] + ctx['last_error'] = None + headers = HTTPHeaderDict(info_dict.get('http_headers')) + byte_range = fragment.get('byte_range') + if byte_range: + headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) + + # Never skip the first fragment + fatal = is_fatal(fragment.get('index') or (frag_index - 1)) + + def error_callback(err, count, retries): + if fatal and count > retries: + ctx['dest_stream'].close() + self.report_retry(err, count, retries, frag_index, fatal) + ctx['last_error'] = err + + for retry in RetryManager(self.params.get('fragment_retries'), error_callback): + try: + ctx['fragment_count'] = fragment.get('fragment_count') + if not self._download_fragment( + ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')): + return + except (HTTPError, IncompleteRead) as err: + retry.error = err + continue + except DownloadError: # has own retry settings + if fatal: + raise + + def append_fragment(frag_content, frag_index, ctx): + if frag_content: + self._append_fragment(ctx, pack_func(frag_content, frag_index)) + elif not is_fatal(frag_index - 1): + self.report_skip_fragment(frag_index, 'fragment not found') + else: + ctx['dest_stream'].close() + self.report_error(f'fragment {frag_index} not found, unable to continue') + return False + return True + + decrypt_fragment = self.decrypter(info_dict) + + max_workers = math.ceil( + self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) + if max_workers > 1: + def _download_fragment(fragment): + ctx_copy = ctx.copy() + download_fragment(fragment, ctx_copy) + return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') + + with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: + try: + for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): + ctx.update({ + 'fragment_filename_sanitized': frag_filename, + 'fragment_index': frag_index, + }) + if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx): + return False + except KeyboardInterrupt: + self._finish_multiline_status() + self.report_error( + 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False) + pool.shutdown(wait=False) + raise + else: + for fragment in fragments: + if not interrupt_trigger[0]: + break + try: + download_fragment(fragment, ctx) + result = append_fragment( + decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) + except KeyboardInterrupt: + if info_dict.get('is_live'): + break + raise + if not result: + return False + + if finish_func is not None: + ctx['dest_stream'].write(finish_func()) + ctx['dest_stream'].flush() + return self._finish_frag_download(ctx, info_dict) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py new file mode 100644 index 0000000..4ac5d99 --- /dev/null +++ b/yt_dlp/downloader/hls.py @@ -0,0 +1,378 @@ +import binascii +import io +import re +import urllib.parse + +from . import get_suitable_downloader +from .external import FFmpegFD +from .fragment import FragmentFD +from .. import webvtt +from ..dependencies import Cryptodome +from ..utils import ( + bug_reports_message, + parse_m3u8_attributes, + remove_start, + traverse_obj, + update_url_query, + urljoin, +) + + +class HlsFD(FragmentFD): + """ + Download segments in a m3u8 manifest. External downloaders can take over + the fragment downloads by supporting the 'm3u8_frag_urls' protocol and + re-defining 'supports_manifest' function + """ + + FD_NAME = 'hlsnative' + + @staticmethod + def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039 + return bool(re.search('|'.join(( + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay + r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + )), manifest)) + + @classmethod + def can_download(cls, manifest, info_dict, allow_unplayable_formats=False): + UNSUPPORTED_FEATURES = [ + # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + + # Live streams heuristic does not always work (e.g. geo restricted to Germany + # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) + # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + + # This heuristic also is not correct since segments may not be appended as well. + # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite + # no segments will definitely be appended to the end of the playlist. + # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # # event media playlists [4] + # r'#EXT-X-MAP:', # media initialization [5] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 + # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 + # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 + # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 + ] + if not allow_unplayable_formats: + UNSUPPORTED_FEATURES += [ + r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM + ] + + def check_results(): + yield not info_dict.get('is_live') + for feature in UNSUPPORTED_FEATURES: + yield not re.search(feature, manifest) + if not allow_unplayable_formats: + yield not cls._has_drm(manifest) + return all(check_results()) + + def real_download(self, filename, info_dict): + man_url = info_dict['url'] + self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) + + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.url + s = urlh.read().decode('utf-8', 'ignore') + + can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None + if can_download: + has_ffmpeg = FFmpegFD.available() + no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s + if no_crypto and has_ffmpeg: + can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' + elif no_crypto: + message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' + 'Decryption will be performed natively, but will be extremely slow') + elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s): + install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and ' + message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' + f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') + if not can_download: + if self._has_drm(s) and not self.params.get('allow_unplayable_formats'): + if info_dict.get('has_drm') and self.params.get('test'): + self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True) + else: + self.report_error( + 'This format is DRM protected; Try selecting another format with --format or ' + 'add --check-formats to automatically fallback to the next best format', tb=False) + return False + message = message or 'Unsupported features have been detected' + fd = FFmpegFD(self.ydl, self.params) + self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') + return fd.real_download(filename, info_dict) + elif message: + self.report_warning(message) + + is_webvtt = info_dict['ext'] == 'vtt' + if is_webvtt: + real_downloader = None # Packing the fragments is not currently supported for external downloader + else: + real_downloader = get_suitable_downloader( + info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-')) + if real_downloader and not real_downloader.supports_manifest(s): + real_downloader = None + if real_downloader: + self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') + + def is_ad_fragment_start(s): + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s + or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + + def is_ad_fragment_end(s): + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s + or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + + fragments = [] + + media_frags = 0 + ad_frags = 0 + ad_frag_next = False + for line in s.splitlines(): + line = line.strip() + if not line: + continue + if line.startswith('#'): + if is_ad_fragment_start(line): + ad_frag_next = True + elif is_ad_fragment_end(line): + ad_frag_next = False + continue + if ad_frag_next: + ad_frags += 1 + continue + media_frags += 1 + + ctx = { + 'filename': filename, + 'total_frags': media_frags, + 'ad_frags': ad_frags, + } + + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', {}) + + format_index = info_dict.get('format_index') + extra_query = None + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + if extra_param_to_segment_url: + extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) + i = 0 + media_sequence = 0 + decrypt_info = {'METHOD': 'NONE'} + external_aes_key = traverse_obj(info_dict, ('hls_aes', 'key')) + if external_aes_key: + external_aes_key = binascii.unhexlify(remove_start(external_aes_key, '0x')) + assert len(external_aes_key) in (16, 24, 32), 'Invalid length for HLS AES-128 key' + external_aes_iv = traverse_obj(info_dict, ('hls_aes', 'iv')) + if external_aes_iv: + external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) + byte_range = {} + discontinuity_count = 0 + frag_index = 0 + ad_frag_next = False + for line in s.splitlines(): + line = line.strip() + if line: + if not line.startswith('#'): + if format_index and discontinuity_count != format_index: + continue + if ad_frag_next: + continue + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + frag_url = urljoin(man_url, line) + if extra_query: + frag_url = update_url_query(frag_url, extra_query) + + fragments.append({ + 'frag_index': frag_index, + 'url': frag_url, + 'decrypt_info': decrypt_info, + 'byte_range': byte_range, + 'media_sequence': media_sequence, + }) + media_sequence += 1 + + elif line.startswith('#EXT-X-MAP'): + if format_index and discontinuity_count != format_index: + continue + if frag_index > 0: + self.report_error( + 'Initialization fragment found after media fragments, unable to download') + return False + frag_index += 1 + map_info = parse_m3u8_attributes(line[11:]) + frag_url = urljoin(man_url, map_info.get('URI')) + if extra_query: + frag_url = update_url_query(frag_url, extra_query) + + if map_info.get('BYTERANGE'): + splitted_byte_range = map_info.get('BYTERANGE').split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + + fragments.append({ + 'frag_index': frag_index, + 'url': frag_url, + 'decrypt_info': decrypt_info, + 'byte_range': byte_range, + 'media_sequence': media_sequence + }) + media_sequence += 1 + + elif line.startswith('#EXT-X-KEY'): + decrypt_url = decrypt_info.get('URI') + decrypt_info = parse_m3u8_attributes(line[11:]) + if decrypt_info['METHOD'] == 'AES-128': + if external_aes_iv: + decrypt_info['IV'] = external_aes_iv + elif 'IV' in decrypt_info: + decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) + if external_aes_key: + decrypt_info['KEY'] = external_aes_key + else: + decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) + if extra_query: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) + if decrypt_url != decrypt_info['URI']: + decrypt_info['KEY'] = None + + elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): + media_sequence = int(line[22:]) + elif line.startswith('#EXT-X-BYTERANGE'): + splitted_byte_range = line[17:].split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + elif is_ad_fragment_start(line): + ad_frag_next = True + elif is_ad_fragment_end(line): + ad_frag_next = False + elif line.startswith('#EXT-X-DISCONTINUITY'): + discontinuity_count += 1 + i += 1 + + # We only download the first fragment during the test + if self.params.get('test', False): + fragments = [fragments[0] if fragments else None] + + if real_downloader: + info_dict['fragments'] = fragments + fd = real_downloader(self.ydl, self.params) + # TODO: Make progress updates work without hooking twice + # for ph in self._progress_hooks: + # fd.add_progress_hook(ph) + return fd.real_download(filename, info_dict) + + if is_webvtt: + def pack_fragment(frag_content, frag_index): + output = io.StringIO() + adjust = 0 + overflow = False + mpegts_last = None + for block in webvtt.parse_fragment(frag_content): + if isinstance(block, webvtt.CueBlock): + extra_state['webvtt_mpegts_last'] = mpegts_last + if overflow: + extra_state['webvtt_mpegts_adjust'] += 1 + overflow = False + block.start += adjust + block.end += adjust + + dedup_window = extra_state.setdefault('webvtt_dedup_window', []) + + ready = [] + + i = 0 + is_new = True + while i < len(dedup_window): + wcue = dedup_window[i] + wblock = webvtt.CueBlock.from_json(wcue) + i += 1 + if wblock.hinges(block): + wcue['end'] = block.end + is_new = False + continue + if wblock == block: + is_new = False + continue + if wblock.end > block.start: + continue + ready.append(wblock) + i -= 1 + del dedup_window[i] + + if is_new: + dedup_window.append(block.as_json) + for block in ready: + block.write_into(output) + + # we only emit cues once they fall out of the duplicate window + continue + elif isinstance(block, webvtt.Magic): + # take care of MPEG PES timestamp overflow + if block.mpegts is None: + block.mpegts = 0 + extra_state.setdefault('webvtt_mpegts_adjust', 0) + block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 + if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): + overflow = True + block.mpegts += 1 << 33 + mpegts_last = block.mpegts + + if frag_index == 1: + extra_state['webvtt_mpegts'] = block.mpegts or 0 + extra_state['webvtt_local'] = block.local or 0 + # XXX: block.local = block.mpegts = None ? + else: + if block.mpegts is not None and block.local is not None: + adjust = ( + (block.mpegts - extra_state.get('webvtt_mpegts', 0)) + - (block.local - extra_state.get('webvtt_local', 0)) + ) + continue + elif isinstance(block, webvtt.HeaderBlock): + if frag_index != 1: + # XXX: this should probably be silent as well + # or verify that all segments contain the same data + self.report_warning(bug_reports_message( + 'Discarding a %s block found in the middle of the stream; ' + 'if the subtitles display incorrectly,' + % (type(block).__name__))) + continue + block.write_into(output) + + return output.getvalue().encode() + + def fin_fragments(): + dedup_window = extra_state.get('webvtt_dedup_window') + if not dedup_window: + return b'' + + output = io.StringIO() + for cue in dedup_window: + webvtt.CueBlock.from_json(cue).write_into(output) + + return output.getvalue().encode() + + if len(fragments) == 1: + self.download_and_append_fragments(ctx, fragments, info_dict) + else: + self.download_and_append_fragments( + ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) + else: + return self.download_and_append_fragments(ctx, fragments, info_dict) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py new file mode 100644 index 0000000..693828b --- /dev/null +++ b/yt_dlp/downloader/http.py @@ -0,0 +1,383 @@ +import os +import random +import time + +from .common import FileDownloader +from ..networking import Request +from ..networking.exceptions import ( + CertificateVerifyError, + HTTPError, + TransportError, +) +from ..utils import ( + ContentTooShortError, + RetryManager, + ThrottledDownload, + XAttrMetadataError, + XAttrUnavailableError, + encodeFilename, + int_or_none, + parse_http_range, + try_call, + write_xattr, +) +from ..utils.networking import HTTPHeaderDict + + +class HttpFD(FileDownloader): + def real_download(self, filename, info_dict): + url = info_dict['url'] + request_data = info_dict.get('request_data', None) + + class DownloadContext(dict): + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + + ctx = DownloadContext() + ctx.filename = filename + ctx.tmpfilename = self.temp_name(filename) + ctx.stream = None + + # Disable compression + headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers')) + + is_test = self.params.get('test', False) + chunk_size = self._TEST_FILE_SIZE if is_test else ( + self.params.get('http_chunk_size') + or info_dict.get('downloader_options', {}).get('http_chunk_size') + or 0) + + ctx.open_mode = 'wb' + ctx.resume_len = 0 + ctx.block_size = self.params.get('buffersize', 1024) + ctx.start_time = time.time() + + # parse given Range + req_start, req_end, _ = parse_http_range(headers.get('Range')) + + if self.params.get('continuedl', True): + # Establish possible resume length + if os.path.isfile(encodeFilename(ctx.tmpfilename)): + ctx.resume_len = os.path.getsize( + encodeFilename(ctx.tmpfilename)) + + ctx.is_resume = ctx.resume_len > 0 + + class SucceedDownload(Exception): + pass + + class RetryDownload(Exception): + def __init__(self, source_error): + self.source_error = source_error + + class NextFragment(Exception): + pass + + def establish_connection(): + ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) + if not is_test and chunk_size else chunk_size) + if ctx.resume_len > 0: + range_start = ctx.resume_len + if req_start is not None: + # offset the beginning of Range to be within request + range_start += req_start + if ctx.is_resume: + self.report_resuming_byte(ctx.resume_len) + ctx.open_mode = 'ab' + elif req_start is not None: + range_start = req_start + elif ctx.chunk_size > 0: + range_start = 0 + else: + range_start = None + ctx.is_resume = False + + if ctx.chunk_size: + chunk_aware_end = range_start + ctx.chunk_size - 1 + # we're not allowed to download outside Range + range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end) + elif req_end is not None: + # there's no need for chunked downloads, so download until the end of Range + range_end = req_end + else: + range_end = None + + if try_call(lambda: range_start > range_end): + ctx.resume_len = 0 + ctx.open_mode = 'wb' + raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})')) + + if try_call(lambda: range_end >= ctx.content_len): + range_end = ctx.content_len - 1 + + request = Request(url, request_data, headers) + has_range = range_start is not None + if has_range: + request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' + # Establish connection + try: + ctx.data = self.ydl.urlopen(request) + # When trying to resume, Content-Range HTTP header of response has to be checked + # to match the value of requested Range HTTP header. This is due to a webservers + # that don't support resuming and serve a whole file with no Content-Range + # set in response despite of requested Range (see + # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) + if has_range: + content_range = ctx.data.headers.get('Content-Range') + content_range_start, content_range_end, content_len = parse_http_range(content_range) + # Content-Range is present and matches requested Range, resume is possible + if range_start == content_range_start and ( + # Non-chunked download + not ctx.chunk_size + # Chunked download and requested piece or + # its part is promised to be served + or content_range_end == range_end + or content_len < range_end): + ctx.content_len = content_len + if content_len or req_end: + ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0) + return + # Content-Range is either not present or invalid. Assuming remote webserver is + # trying to send the whole file, resume is not possible, so wiping the local file + # and performing entire redownload + elif range_start > 0: + self.report_unable_to_resume() + ctx.resume_len = 0 + ctx.open_mode = 'wb' + ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None)) + except HTTPError as err: + if err.status == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + ctx.data = self.ydl.urlopen( + Request(url, request_data, headers)) + content_length = ctx.data.headers['Content-Length'] + except HTTPError as err: + if err.status < 500 or err.status >= 600: + raise + else: + # Examine the reported length + if (content_length is not None + and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(ctx.filename) + self.try_rename(ctx.tmpfilename, ctx.filename) + self._hook_progress({ + 'filename': ctx.filename, + 'status': 'finished', + 'downloaded_bytes': ctx.resume_len, + 'total_bytes': ctx.resume_len, + }, info_dict) + raise SucceedDownload() + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + ctx.resume_len = 0 + ctx.open_mode = 'wb' + return + elif err.status < 500 or err.status >= 600: + # Unexpected HTTP error + raise + raise RetryDownload(err) + except CertificateVerifyError: + raise + except TransportError as err: + raise RetryDownload(err) + + def close_stream(): + if ctx.stream is not None: + if not ctx.tmpfilename == '-': + ctx.stream.close() + ctx.stream = None + + def download(): + data_len = ctx.data.headers.get('Content-length') + + if ctx.data.headers.get('Content-encoding'): + # Content-encoding is present, Content-length is not reliable anymore as we are + # doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176) + data_len = None + + # Range HTTP header may be ignored/unsupported by a webserver + # (e.g. extractor/scivee.py, extractor/bambuser.py). + # However, for a test we still would like to download just a piece of a file. + # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control + # block size when downloading a file. + if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): + data_len = self._TEST_FILE_SIZE + + if data_len is not None: + data_len = int(data_len) + ctx.resume_len + min_data_len = self.params.get('min_filesize') + max_data_len = self.params.get('max_filesize') + if min_data_len is not None and data_len < min_data_len: + self.to_screen( + f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.') + return False + if max_data_len is not None and data_len > max_data_len: + self.to_screen( + f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.') + return False + + byte_counter = 0 + ctx.resume_len + block_size = ctx.block_size + start = time.time() + + # measure time over whole while-loop, so slow_down() and best_block_size() work together properly + now = None # needed for slow_down() in the first loop run + before = start # start measuring + + def retry(e): + close_stream() + if ctx.tmpfilename == '-': + ctx.resume_len = byte_counter + else: + try: + ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) + except FileNotFoundError: + ctx.resume_len = 0 + raise RetryDownload(e) + + while True: + try: + # Download and write + data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) + except TransportError as err: + retry(err) + + byte_counter += len(data_block) + + # exit loop when download is finished + if len(data_block) == 0: + break + + # Open destination file just in time + if ctx.stream is None: + try: + ctx.stream, ctx.tmpfilename = self.sanitize_open( + ctx.tmpfilename, ctx.open_mode) + assert ctx.stream is not None + ctx.filename = self.undo_temp_name(ctx.tmpfilename) + self.report_destination(ctx.filename) + except OSError as err: + self.report_error('unable to open for writing: %s' % str(err)) + return False + + if self.params.get('xattr_set_filesize', False) and data_len is not None: + try: + write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) + except (XAttrUnavailableError, XAttrMetadataError) as err: + self.report_error('unable to set filesize xattr: %s' % str(err)) + + try: + ctx.stream.write(data_block) + except OSError as err: + self.to_stderr('\n') + self.report_error('unable to write data: %s' % str(err)) + return False + + # Apply rate limit + self.slow_down(start, now, byte_counter - ctx.resume_len) + + # end measuring of one loop run + now = time.time() + after = now + + # Adjust block size + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) + + before = after + + # Progress message + speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) + if ctx.data_len is None: + eta = None + else: + eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len) + + self._hook_progress({ + 'status': 'downloading', + 'downloaded_bytes': byte_counter, + 'total_bytes': ctx.data_len, + 'tmpfilename': ctx.tmpfilename, + 'filename': ctx.filename, + 'eta': eta, + 'speed': speed, + 'elapsed': now - ctx.start_time, + 'ctx_id': info_dict.get('ctx_id'), + }, info_dict) + + if data_len is not None and byte_counter == data_len: + break + + if speed and speed < (self.params.get('throttledratelimit') or 0): + # The speed must stay below the limit for 3 seconds + # This prevents raising error when the speed temporarily goes down + if ctx.throttle_start is None: + ctx.throttle_start = now + elif now - ctx.throttle_start > 3: + if ctx.stream is not None and ctx.tmpfilename != '-': + ctx.stream.close() + raise ThrottledDownload() + elif speed: + ctx.throttle_start = None + + if ctx.stream is None: + self.to_stderr('\n') + self.report_error('Did not get any data blocks') + return False + + if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: + ctx.resume_len = byte_counter + raise NextFragment() + + if ctx.tmpfilename != '-': + ctx.stream.close() + + if data_len is not None and byte_counter != data_len: + err = ContentTooShortError(byte_counter, int(data_len)) + retry(err) + + self.try_rename(ctx.tmpfilename, ctx.filename) + + # Update file modification time + if self.params.get('updatetime', True): + info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None)) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, + 'filename': ctx.filename, + 'status': 'finished', + 'elapsed': time.time() - ctx.start_time, + 'ctx_id': info_dict.get('ctx_id'), + }, info_dict) + + return True + + for retry in RetryManager(self.params.get('retries'), self.report_retry): + try: + establish_connection() + return download() + except RetryDownload as err: + retry.error = err.source_error + continue + except NextFragment: + retry.error = None + retry.attempt -= 1 + continue + except SucceedDownload: + return True + except: # noqa: E722 + close_stream() + raise + return False diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py new file mode 100644 index 0000000..dd688f5 --- /dev/null +++ b/yt_dlp/downloader/ism.py @@ -0,0 +1,283 @@ +import binascii +import io +import struct +import time + +from .fragment import FragmentFD +from ..networking.exceptions import HTTPError +from ..utils import RetryManager + +u8 = struct.Struct('>B') +u88 = struct.Struct('>Bx') +u16 = struct.Struct('>H') +u1616 = struct.Struct('>Hxx') +u32 = struct.Struct('>I') +u64 = struct.Struct('>Q') + +s88 = struct.Struct('>bx') +s16 = struct.Struct('>h') +s1616 = struct.Struct('>hxx') +s32 = struct.Struct('>i') + +unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) + +TRACK_ENABLED = 0x1 +TRACK_IN_MOVIE = 0x2 +TRACK_IN_PREVIEW = 0x4 + +SELF_CONTAINED = 0x1 + + +def box(box_type, payload): + return u32.pack(8 + len(payload)) + box_type + payload + + +def full_box(box_type, version, flags, payload): + return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload) + + +def write_piff_header(stream, params): + track_id = params['track_id'] + fourcc = params['fourcc'] + duration = params['duration'] + timescale = params.get('timescale', 10000000) + language = params.get('language', 'und') + height = params.get('height', 0) + width = params.get('width', 0) + stream_type = params['stream_type'] + creation_time = modification_time = int(time.time()) + + ftyp_payload = b'isml' # major brand + ftyp_payload += u32.pack(1) # minor version + ftyp_payload += b'piff' + b'iso2' # compatible brands + stream.write(box(b'ftyp', ftyp_payload)) # File Type Box + + mvhd_payload = u64.pack(creation_time) + mvhd_payload += u64.pack(modification_time) + mvhd_payload += u32.pack(timescale) + mvhd_payload += u64.pack(duration) + mvhd_payload += s1616.pack(1) # rate + mvhd_payload += s88.pack(1) # volume + mvhd_payload += u16.pack(0) # reserved + mvhd_payload += u32.pack(0) * 2 # reserved + mvhd_payload += unity_matrix + mvhd_payload += u32.pack(0) * 6 # pre defined + mvhd_payload += u32.pack(0xffffffff) # next track id + moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box + + tkhd_payload = u64.pack(creation_time) + tkhd_payload += u64.pack(modification_time) + tkhd_payload += u32.pack(track_id) # track id + tkhd_payload += u32.pack(0) # reserved + tkhd_payload += u64.pack(duration) + tkhd_payload += u32.pack(0) * 2 # reserved + tkhd_payload += s16.pack(0) # layer + tkhd_payload += s16.pack(0) # alternate group + tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume + tkhd_payload += u16.pack(0) # reserved + tkhd_payload += unity_matrix + tkhd_payload += u1616.pack(width) + tkhd_payload += u1616.pack(height) + trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box + + mdhd_payload = u64.pack(creation_time) + mdhd_payload += u64.pack(modification_time) + mdhd_payload += u32.pack(timescale) + mdhd_payload += u64.pack(duration) + mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60)) + mdhd_payload += u16.pack(0) # pre defined + mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box + + hdlr_payload = u32.pack(0) # pre defined + if stream_type == 'audio': # handler type + hdlr_payload += b'soun' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'SoundHandler\0' # name + elif stream_type == 'video': + hdlr_payload += b'vide' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'VideoHandler\0' # name + elif stream_type == 'text': + hdlr_payload += b'subt' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'SubtitleHandler\0' # name + else: + assert False + mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box + + if stream_type == 'audio': + smhd_payload = s88.pack(0) # balance + smhd_payload += u16.pack(0) # reserved + media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header + elif stream_type == 'video': + vmhd_payload = u16.pack(0) # graphics mode + vmhd_payload += u16.pack(0) * 3 # opcolor + media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header + elif stream_type == 'text': + media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header + else: + assert False + minf_payload = media_header_box + + dref_payload = u32.pack(1) # entry count + dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box + dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box + minf_payload += box(b'dinf', dinf_payload) # Data Information Box + + stsd_payload = u32.pack(1) # entry count + + sample_entry_payload = u8.pack(0) * 6 # reserved + sample_entry_payload += u16.pack(1) # data reference index + if stream_type == 'audio': + sample_entry_payload += u32.pack(0) * 2 # reserved + sample_entry_payload += u16.pack(params.get('channels', 2)) + sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u1616.pack(params['sampling_rate']) + + if fourcc == 'AACL': + sample_entry_box = box(b'mp4a', sample_entry_payload) + if fourcc == 'EC-3': + sample_entry_box = box(b'ec-3', sample_entry_payload) + elif stream_type == 'video': + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u32.pack(0) * 3 # pre defined + sample_entry_payload += u16.pack(width) + sample_entry_payload += u16.pack(height) + sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi + sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi + sample_entry_payload += u32.pack(0) # reserved + sample_entry_payload += u16.pack(1) # frame count + sample_entry_payload += u8.pack(0) * 32 # compressor name + sample_entry_payload += u16.pack(0x18) # depth + sample_entry_payload += s16.pack(-1) # pre defined + + codec_private_data = binascii.unhexlify(params['codec_private_data'].encode()) + if fourcc in ('H264', 'AVC1'): + sps, pps = codec_private_data.split(u32.pack(1))[1:] + avcc_payload = u8.pack(1) # configuration version + avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication + avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one + avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) + avcc_payload += u16.pack(len(sps)) + avcc_payload += sps + avcc_payload += u8.pack(1) # number of pps + avcc_payload += u16.pack(len(pps)) + avcc_payload += pps + sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record + sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry + else: + assert False + elif stream_type == 'text': + if fourcc == 'TTML': + sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace + sample_entry_payload += b'\0' # schema location + sample_entry_payload += b'\0' # auxilary mime types(??) + sample_entry_box = box(b'stpp', sample_entry_payload) + else: + assert False + else: + assert False + stsd_payload += sample_entry_box + + stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box + + stts_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box + + stsc_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box + + stco_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box + + minf_payload += box(b'stbl', stbl_payload) # Sample Table Box + + mdia_payload += box(b'minf', minf_payload) # Media Information Box + + trak_payload += box(b'mdia', mdia_payload) # Media Box + + moov_payload += box(b'trak', trak_payload) # Track Box + + mehd_payload = u64.pack(duration) + mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box + + trex_payload = u32.pack(track_id) # track id + trex_payload += u32.pack(1) # default sample description index + trex_payload += u32.pack(0) # default sample duration + trex_payload += u32.pack(0) # default sample size + trex_payload += u32.pack(0) # default sample flags + mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box + + moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box + stream.write(box(b'moov', moov_payload)) # Movie Box + + +def extract_box_data(data, box_sequence): + data_reader = io.BytesIO(data) + while True: + box_size = u32.unpack(data_reader.read(4))[0] + box_type = data_reader.read(4) + if box_type == box_sequence[0]: + box_data = data_reader.read(box_size - 8) + if len(box_sequence) == 1: + return box_data + return extract_box_data(box_data, box_sequence[1:]) + data_reader.seek(box_size - 8, 1) + + +class IsmFD(FragmentFD): + """ + Download segments in a ISM manifest + """ + + def real_download(self, filename, info_dict): + segments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + + ctx = { + 'filename': filename, + 'total_frags': len(segments), + } + + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', { + 'ism_track_written': False, + }) + + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + frag_index = 0 + for i, segment in enumerate(segments): + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + + retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, + frag_index=frag_index, fatal=not skip_unavailable_fragments) + for retry in retry_manager: + try: + success = self._download_fragment(ctx, segment['url'], info_dict) + if not success: + return False + frag_content = self._read_fragment(ctx) + + if not extra_state['ism_track_written']: + tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) + info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] + write_piff_header(ctx['dest_stream'], info_dict['_download_params']) + extra_state['ism_track_written'] = True + self._append_fragment(ctx, frag_content) + except HTTPError as err: + retry.error = err + continue + + if retry_manager.error: + if not skip_unavailable_fragments: + return False + self.report_skip_fragment(frag_index) + + return self._finish_frag_download(ctx, info_dict) diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py new file mode 100644 index 0000000..d977dce --- /dev/null +++ b/yt_dlp/downloader/mhtml.py @@ -0,0 +1,189 @@ +import io +import quopri +import re +import uuid + +from .fragment import FragmentFD +from ..compat import imghdr +from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin +from ..version import __version__ as YT_DLP_VERSION + + +class MhtmlFD(FragmentFD): + _STYLESHEET = """\ +html, body { + margin: 0; + padding: 0; + height: 100vh; +} + +html { + overflow-y: scroll; + scroll-snap-type: y mandatory; +} + +body { + scroll-snap-type: y mandatory; + display: flex; + flex-flow: column; +} + +body > figure { + max-width: 100vw; + max-height: 100vh; + scroll-snap-align: center; +} + +body > figure > figcaption { + text-align: center; + height: 2.5em; +} + +body > figure > img { + display: block; + margin: auto; + max-width: 100%; + max-height: calc(100vh - 5em); +} +""" + _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) + _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) + + @staticmethod + def _escape_mime(s): + return '=?utf-8?Q?' + (b''.join( + bytes((b,)) if b >= 0x20 else b'=%02X' % b + for b in quopri.encodestring(s.encode(), header=True) + )).decode('us-ascii') + '?=' + + def _gen_cid(self, i, fragment, frag_boundary): + return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary) + + def _gen_stub(self, *, fragments, frag_boundary, title): + output = io.StringIO() + + output.write(( + '' + '' + '' + '' '' + '' '{title}' + '' '' + '' + ).format( + version=escapeHTML(YT_DLP_VERSION), + styles=self._STYLESHEET, + title=escapeHTML(title) + )) + + t0 = 0 + for i, frag in enumerate(fragments): + output.write('
') + try: + t1 = t0 + frag['duration'] + output.write(( + '
Slide #{num}: {t0} – {t1} (duration: {duration})
' + ).format( + num=i + 1, + t0=srt_subtitles_timecode(t0), + t1=srt_subtitles_timecode(t1), + duration=formatSeconds(frag['duration'], msec=True) + )) + except (KeyError, ValueError, TypeError): + t1 = None + output.write(( + '
Slide #{num}
' + ).format(num=i + 1)) + output.write(''.format( + cid=self._gen_cid(i, frag, frag_boundary))) + output.write('
') + t0 = t1 + + return output.getvalue() + + def real_download(self, filename, info_dict): + fragment_base_url = info_dict.get('fragment_base_url') + fragments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + title = info_dict.get('title', info_dict['format_id']) + origin = info_dict.get('webpage_url', info_dict['url']) + + ctx = { + 'filename': filename, + 'total_frags': len(fragments), + } + + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', { + 'header_written': False, + 'mime_boundary': str(uuid.uuid4()).replace('-', ''), + }) + + frag_boundary = extra_state['mime_boundary'] + + if not extra_state['header_written']: + stub = self._gen_stub( + fragments=fragments, + frag_boundary=frag_boundary, + title=title + ) + + ctx['dest_stream'].write(( + 'MIME-Version: 1.0\r\n' + 'From: \r\n' + 'To: \r\n' + 'Subject: {title}\r\n' + 'Content-type: multipart/related; ' + '' 'boundary="{boundary}"; ' + '' 'type="text/html"\r\n' + 'X.yt-dlp.Origin: {origin}\r\n' + '\r\n' + '--{boundary}\r\n' + 'Content-Type: text/html; charset=utf-8\r\n' + 'Content-Length: {length}\r\n' + '\r\n' + '{stub}\r\n' + ).format( + origin=origin, + boundary=frag_boundary, + length=len(stub), + title=self._escape_mime(title), + stub=stub + ).encode()) + extra_state['header_written'] = True + + for i, fragment in enumerate(fragments): + if (i + 1) <= ctx['fragment_index']: + continue + + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + + success = self._download_fragment(ctx, fragment_url, info_dict) + if not success: + continue + frag_content = self._read_fragment(ctx) + + frag_header = io.BytesIO() + frag_header.write( + b'--%b\r\n' % frag_boundary.encode('us-ascii')) + frag_header.write( + b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii')) + frag_header.write( + b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode()) + frag_header.write( + b'Content-length: %u\r\n' % len(frag_content)) + frag_header.write( + b'Content-location: %b\r\n' % fragment_url.encode('us-ascii')) + frag_header.write( + b'X.yt-dlp.Duration: %f\r\n' % fragment['duration']) + frag_header.write(b'\r\n') + self._append_fragment( + ctx, frag_header.getvalue() + frag_content + b'\r\n') + + ctx['dest_stream'].write( + b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii')) + return self._finish_frag_download(ctx, info_dict) diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py new file mode 100644 index 0000000..fef8bff --- /dev/null +++ b/yt_dlp/downloader/niconico.py @@ -0,0 +1,140 @@ +import json +import threading +import time + +from . import get_suitable_downloader +from .common import FileDownloader +from .external import FFmpegFD +from ..networking import Request +from ..utils import DownloadError, str_or_none, try_get + + +class NiconicoDmcFD(FileDownloader): + """ Downloading niconico douga from DMC with heartbeat """ + + def real_download(self, filename, info_dict): + from ..extractor.niconico import NiconicoIE + + self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) + ie = NiconicoIE(self.ydl) + info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) + + fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params) + + success = download_complete = False + timer = [None] + heartbeat_lock = threading.Lock() + heartbeat_url = heartbeat_info_dict['url'] + heartbeat_data = heartbeat_info_dict['data'].encode() + heartbeat_interval = heartbeat_info_dict.get('interval', 30) + + request = Request(heartbeat_url, heartbeat_data) + + def heartbeat(): + try: + self.ydl.urlopen(request).read() + except Exception: + self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) + + with heartbeat_lock: + if not download_complete: + timer[0] = threading.Timer(heartbeat_interval, heartbeat) + timer[0].start() + + heartbeat_info_dict['ping']() + self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval)) + try: + heartbeat() + if type(fd).__name__ == 'HlsFD': + info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0]) + success = fd.real_download(filename, info_dict) + finally: + if heartbeat_lock: + with heartbeat_lock: + timer[0].cancel() + download_complete = True + return success + + +class NiconicoLiveFD(FileDownloader): + """ Downloads niconico live without being stopped """ + + def real_download(self, filename, info_dict): + video_id = info_dict['video_id'] + ws_url = info_dict['url'] + ws_extractor = info_dict['ws'] + ws_origin_host = info_dict['origin'] + live_quality = info_dict.get('live_quality', 'high') + live_latency = info_dict.get('live_latency', 'high') + dl = FFmpegFD(self.ydl, self.params or {}) + + new_info_dict = info_dict.copy() + new_info_dict.update({ + 'protocol': 'm3u8', + }) + + def communicate_ws(reconnect): + if reconnect: + ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'})) + if self.ydl.params.get('verbose', False): + self.to_screen('[debug] Sending startWatching request') + ws.send(json.dumps({ + 'type': 'startWatching', + 'data': { + 'stream': { + 'quality': live_quality, + 'protocol': 'hls+fmp4', + 'latency': live_latency, + 'chasePlay': False + }, + 'room': { + 'protocol': 'webSocket', + 'commentable': True + }, + 'reconnect': True, + } + })) + else: + ws = ws_extractor + with ws: + while True: + recv = ws.recv() + if not recv: + continue + data = json.loads(recv) + if not data or not isinstance(data, dict): + continue + if data.get('type') == 'ping': + # pong back + ws.send(r'{"type":"pong"}') + ws.send(r'{"type":"keepSeat"}') + elif data.get('type') == 'disconnect': + self.write_debug(data) + return True + elif data.get('type') == 'error': + self.write_debug(data) + message = try_get(data, lambda x: x['body']['code'], str) or recv + return DownloadError(message) + elif self.ydl.params.get('verbose', False): + if len(recv) > 100: + recv = recv[:100] + '...' + self.to_screen('[debug] Server said: %s' % recv) + + def ws_main(): + reconnect = False + while True: + try: + ret = communicate_ws(reconnect) + if ret is True: + return + except BaseException as e: + self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) + time.sleep(10) + continue + finally: + reconnect = True + + thread = threading.Thread(target=ws_main, daemon=True) + thread.start() + + return dl.download(filename, new_info_dict) diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py new file mode 100644 index 0000000..0e09525 --- /dev/null +++ b/yt_dlp/downloader/rtmp.py @@ -0,0 +1,213 @@ +import os +import re +import subprocess +import time + +from .common import FileDownloader +from ..utils import ( + Popen, + check_executable, + encodeArgument, + encodeFilename, + get_exe_version, +) + + +def rtmpdump_version(): + return get_exe_version( + 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)') + + +class RtmpFD(FileDownloader): + def real_download(self, filename, info_dict): + def run_rtmpdump(args): + start = time.time() + resume_percent = None + resume_downloaded_data_len = None + proc = Popen(args, stderr=subprocess.PIPE) + cursor_in_new_line = True + proc_stderr_closed = False + try: + while not proc_stderr_closed: + # read line from stderr + line = '' + while True: + char = proc.stderr.read(1) + if not char: + proc_stderr_closed = True + break + if char in [b'\r', b'\n']: + break + line += char.decode('ascii', 'replace') + if not line: + # proc_stderr_closed is True + continue + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1)) * 1024) + percent = float(mobj.group(2)) + if not resume_percent: + resume_percent = percent + resume_downloaded_data_len = downloaded_data_len + time_now = time.time() + eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) + speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) + data_len = None + if percent > 0: + data_len = int(downloaded_data_len * 100 / percent) + self._hook_progress({ + 'status': 'downloading', + 'downloaded_bytes': downloaded_data_len, + 'total_bytes_estimate': data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'eta': eta, + 'elapsed': time_now - start, + 'speed': speed, + }, info_dict) + cursor_in_new_line = False + else: + # no percent for live streams + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1)) * 1024) + time_now = time.time() + speed = self.calc_speed(start, time_now, downloaded_data_len) + self._hook_progress({ + 'downloaded_bytes': downloaded_data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'status': 'downloading', + 'elapsed': time_now - start, + 'speed': speed, + }, info_dict) + cursor_in_new_line = False + elif self.params.get('verbose', False): + if not cursor_in_new_line: + self.to_screen('') + cursor_in_new_line = True + self.to_screen('[rtmpdump] ' + line) + if not cursor_in_new_line: + self.to_screen('') + return proc.wait() + except BaseException: # Including KeyboardInterrupt + proc.kill(timeout=None) + raise + + url = info_dict['url'] + player_url = info_dict.get('player_url') + page_url = info_dict.get('page_url') + app = info_dict.get('app') + play_path = info_dict.get('play_path') + tc_url = info_dict.get('tc_url') + flash_version = info_dict.get('flash_version') + live = info_dict.get('rtmp_live', False) + conn = info_dict.get('rtmp_conn') + protocol = info_dict.get('rtmp_protocol') + real_time = info_dict.get('rtmp_real_time', False) + no_resume = info_dict.get('no_resume', False) + continue_dl = self.params.get('continuedl', True) + + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + test = self.params.get('test', False) + + # Check for rtmpdump first + if not check_executable('rtmpdump', ['-h']): + self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install') + return False + + # Download using rtmpdump. rtmpdump returns exit code 2 when + # the connection was interrupted and resuming appears to be + # possible. This is part of rtmpdump's normal usage, AFAIK. + basic_args = [ + 'rtmpdump', '--verbose', '-r', url, + '-o', tmpfilename] + if player_url is not None: + basic_args += ['--swfVfy', player_url] + if page_url is not None: + basic_args += ['--pageUrl', page_url] + if app is not None: + basic_args += ['--app', app] + if play_path is not None: + basic_args += ['--playpath', play_path] + if tc_url is not None: + basic_args += ['--tcUrl', tc_url] + if test: + basic_args += ['--stop', '1'] + if flash_version is not None: + basic_args += ['--flashVer', flash_version] + if live: + basic_args += ['--live'] + if isinstance(conn, list): + for entry in conn: + basic_args += ['--conn', entry] + elif isinstance(conn, str): + basic_args += ['--conn', conn] + if protocol is not None: + basic_args += ['--protocol', protocol] + if real_time: + basic_args += ['--realtime'] + + args = basic_args + if not no_resume and continue_dl and not live: + args += ['--resume'] + if not live and continue_dl: + args += ['--skip', '1'] + + args = [encodeArgument(a) for a in args] + + self._debug_cmd(args, exe='rtmpdump') + + RD_SUCCESS = 0 + RD_FAILED = 1 + RD_INCOMPLETE = 2 + RD_NO_CONNECT = 3 + + started = time.time() + + try: + retval = run_rtmpdump(args) + except KeyboardInterrupt: + if not info_dict.get('is_live'): + raise + retval = RD_SUCCESS + self.to_screen('\n[rtmpdump] Interrupted by user') + + if retval == RD_NO_CONNECT: + self.report_error('[rtmpdump] Could not connect to RTMP server.') + return False + + while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: + prevsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) + time.sleep(5.0) # This seems to be needed + args = basic_args + ['--resume'] + if retval == RD_FAILED: + args += ['--skip', '1'] + args = [encodeArgument(a) for a in args] + retval = run_rtmpdump(args) + cursize = os.path.getsize(encodeFilename(tmpfilename)) + if prevsize == cursize and retval == RD_FAILED: + break + # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those + if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: + self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') + retval = RD_SUCCESS + break + if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + 'elapsed': time.time() - started, + }, info_dict) + return True + else: + self.to_stderr('\n') + self.report_error('rtmpdump exited with code %d' % retval) + return False diff --git a/yt_dlp/downloader/rtsp.py b/yt_dlp/downloader/rtsp.py new file mode 100644 index 0000000..e89269f --- /dev/null +++ b/yt_dlp/downloader/rtsp.py @@ -0,0 +1,42 @@ +import os +import subprocess + +from .common import FileDownloader +from ..utils import check_executable, encodeFilename + + +class RtspFD(FileDownloader): + def real_download(self, filename, info_dict): + url = info_dict['url'] + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + if check_executable('mplayer', ['-h']): + args = [ + 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', + '-dumpstream', '-dumpfile', tmpfilename, url] + elif check_executable('mpv', ['-h']): + args = [ + 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] + else: + self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one') + return False + + self._debug_cmd(args) + + retval = subprocess.call(args) + if retval == 0: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen(f'\r[{args[0]}] {fsize} bytes') + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }, info_dict) + return True + else: + self.to_stderr('\n') + self.report_error('%s exited with code %d' % (args[0], retval)) + return False diff --git a/yt_dlp/downloader/websocket.py b/yt_dlp/downloader/websocket.py new file mode 100644 index 0000000..6837ff1 --- /dev/null +++ b/yt_dlp/downloader/websocket.py @@ -0,0 +1,53 @@ +import asyncio +import contextlib +import os +import signal +import threading + +from .common import FileDownloader +from .external import FFmpegFD +from ..dependencies import websockets + + +class FFmpegSinkFD(FileDownloader): + """ A sink to ffmpeg for downloading fragments in any form """ + + def real_download(self, filename, info_dict): + info_copy = info_dict.copy() + info_copy['url'] = '-' + + async def call_conn(proc, stdin): + try: + await self.real_connection(stdin, info_dict) + except OSError: + pass + finally: + with contextlib.suppress(OSError): + stdin.flush() + stdin.close() + os.kill(os.getpid(), signal.SIGINT) + + class FFmpegStdinFD(FFmpegFD): + @classmethod + def get_basename(cls): + return FFmpegFD.get_basename() + + def on_process_started(self, proc, stdin): + thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), )) + thread.start() + + return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy) + + async def real_connection(self, sink, info_dict): + """ Override this in subclasses """ + raise NotImplementedError('This method must be implemented by subclasses') + + +class WebSocketFragmentFD(FFmpegSinkFD): + async def real_connection(self, sink, info_dict): + async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws: + while True: + recv = await ws.recv() + if isinstance(recv, str): + recv = recv.encode('utf8') + sink.write(recv) diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py new file mode 100644 index 0000000..c7a8637 --- /dev/null +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -0,0 +1,228 @@ +import json +import time + +from .fragment import FragmentFD +from ..networking.exceptions import HTTPError +from ..utils import ( + RegexNotFoundError, + RetryManager, + dict_get, + int_or_none, + try_get, +) +from ..utils.networking import HTTPHeaderDict + + +class YoutubeLiveChatFD(FragmentFD): + """ Downloads YouTube live chats fragment by fragment """ + + def real_download(self, filename, info_dict): + video_id = info_dict['video_id'] + self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': + self.report_warning('Live chat download runs until the livestream ends. ' + 'If you wish to download the video simultaneously, run a separate yt-dlp instance') + + test = self.params.get('test', False) + + ctx = { + 'filename': filename, + 'live': True, + 'total_frags': None, + } + + from ..extractor.youtube import YoutubeBaseInfoExtractor + + ie = YoutubeBaseInfoExtractor(self.ydl) + + start_time = int(time.time() * 1000) + + def dl_fragment(url, data=None, headers=None): + http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers) + return self._download_fragment(ctx, url, info_dict, http_headers, data) + + def parse_actions_replay(live_chat_continuation): + offset = continuation_id = click_tracking_params = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + if 'replayChatItemAction' in action: + replay_chat_item_action = action['replayChatItemAction'] + offset = int(replay_chat_item_action['videoOffsetTimeMsec']) + processed_fragment.extend( + json.dumps(action, ensure_ascii=False).encode() + b'\n') + if offset is not None: + continuation = try_get( + live_chat_continuation, + lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict) + if continuation: + continuation_id = continuation.get('continuation') + click_tracking_params = continuation.get('clickTrackingParams') + self._append_fragment(ctx, processed_fragment) + return continuation_id, offset, click_tracking_params + + def try_refresh_replay_beginning(live_chat_continuation): + # choose the second option that contains the unfiltered live chat replay + refresh_continuation = try_get( + live_chat_continuation, + lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict) + if refresh_continuation: + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + refresh_continuation_id = refresh_continuation.get('continuation') + offset = 0 + click_tracking_params = refresh_continuation.get('trackingParams') + return refresh_continuation_id, offset, click_tracking_params + return parse_actions_replay(live_chat_continuation) + + live_offset = 0 + + def parse_actions_live(live_chat_continuation): + nonlocal live_offset + continuation_id = click_tracking_params = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + timestamp = self.parse_live_timestamp(action) + if timestamp is not None: + live_offset = timestamp - start_time + # compatibility with replay format + pseudo_action = { + 'replayChatItemAction': {'actions': [action]}, + 'videoOffsetTimeMsec': str(live_offset), + 'isLive': True, + } + processed_fragment.extend( + json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n') + continuation_data_getters = [ + lambda x: x['continuations'][0]['invalidationContinuationData'], + lambda x: x['continuations'][0]['timedContinuationData'], + ] + continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) + if continuation_data: + continuation_id = continuation_data.get('continuation') + click_tracking_params = continuation_data.get('clickTrackingParams') + timeout_ms = int_or_none(continuation_data.get('timeoutMs')) + if timeout_ms is not None: + time.sleep(timeout_ms / 1000) + self._append_fragment(ctx, processed_fragment) + return continuation_id, live_offset, click_tracking_params + + def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): + for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index): + try: + success = dl_fragment(url, request_data, headers) + if not success: + return False, None, None, None + raw_fragment = self._read_fragment(ctx) + try: + data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + data = None + if not data: + data = json.loads(raw_fragment) + live_chat_continuation = try_get( + data, + lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} + + func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live + or frag_index == 1 and try_refresh_replay_beginning + or parse_actions_replay) + return (True, *func(live_chat_continuation)) + except HTTPError as err: + retry.error = err + continue + return False, None, None, None + + self._prepare_and_start_frag_download(ctx, info_dict) + + success = dl_fragment(info_dict['url']) + if not success: + return False + raw_fragment = self._read_fragment(ctx) + try: + data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + return False + continuation_id = try_get( + data, + lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + + ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) + + if not ytcfg: + return False + api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY']) + innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) + if not api_key or not innertube_context: + return False + visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) + if info_dict['protocol'] == 'youtube_live_chat_replay': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id + elif info_dict['protocol'] == 'youtube_live_chat': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id + + frag_index = offset = 0 + click_tracking_params = None + while continuation_id is not None: + frag_index += 1 + request_data = { + 'context': innertube_context, + 'continuation': continuation_id, + } + if frag_index > 1: + request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} + if click_tracking_params: + request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} + headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) + headers.update({'content-type': 'application/json'}) + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n' + success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( + url, frag_index, fragment_request_data, headers) + else: + success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( + chat_page_url, frag_index) + if not success: + return False + if test: + break + + return self._finish_frag_download(ctx, info_dict) + + @staticmethod + def parse_live_timestamp(action): + action_content = dict_get( + action, + ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) + if not isinstance(action_content, dict): + return None + item = dict_get(action_content, ['item', 'bannerRenderer']) + if not isinstance(item, dict): + return None + renderer = dict_get(item, [ + # text + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + # ticker + 'liveChatTickerPaidMessageItemRenderer', + 'liveChatTickerSponsorItemRenderer', + # banner + 'liveChatBannerRenderer', + ]) + if not isinstance(renderer, dict): + return None + parent_item_getters = [ + lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], + lambda x: x['contents'], + ] + parent_item = try_get(renderer, parent_item_getters, dict) + if parent_item: + renderer = dict_get(parent_item, [ + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + ]) + if not isinstance(renderer, dict): + return None + return int_or_none(renderer.get('timestampUsec'), 1000) diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py new file mode 100644 index 0000000..6bfa4bd --- /dev/null +++ b/yt_dlp/extractor/__init__.py @@ -0,0 +1,42 @@ +from ..compat.compat_utils import passthrough_module + +passthrough_module(__name__, '.extractors') +del passthrough_module + + +def gen_extractor_classes(): + """ Return a list of supported extractors. + The order does matter; the first extractor matched is the one handling the URL. + """ + from .extractors import _ALL_CLASSES + + return _ALL_CLASSES + + +def gen_extractors(): + """ Return a list of an instance of every supported extractor. + The order does matter; the first extractor matched is the one handling the URL. + """ + return [klass() for klass in gen_extractor_classes()] + + +def list_extractor_classes(age_limit=None): + """Return a list of extractors that are suitable for the given age, sorted by extractor name""" + from .generic import GenericIE + + yield from sorted(filter( + lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, + gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) + yield GenericIE + + +def list_extractors(age_limit=None): + """Return a list of extractor instances that are suitable for the given age, sorted by extractor name""" + return [ie() for ie in list_extractor_classes(age_limit)] + + +def get_info_extractor(ie_name): + """Returns the info extractor class with the given ie_name""" + from . import extractors + + return getattr(extractors, f'{ie_name}IE') diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py new file mode 100644 index 0000000..c753655 --- /dev/null +++ b/yt_dlp/extractor/_extractors.py @@ -0,0 +1,2493 @@ +# flake8: noqa: F401 + +from .youtube import ( # Youtube is moved to the top to improve performance + YoutubeIE, + YoutubeClipIE, + YoutubeFavouritesIE, + YoutubeNotificationsIE, + YoutubeHistoryIE, + YoutubeTabIE, + YoutubeLivestreamEmbedIE, + YoutubePlaylistIE, + YoutubeRecommendedIE, + YoutubeSearchDateIE, + YoutubeSearchIE, + YoutubeSearchURLIE, + YoutubeMusicSearchURLIE, + YoutubeSubscriptionsIE, + YoutubeTruncatedIDIE, + YoutubeTruncatedURLIE, + YoutubeYtBeIE, + YoutubeYtUserIE, + YoutubeWatchLaterIE, + YoutubeShortsAudioPivotIE, + YoutubeConsentRedirectIE, +) + +from .abc import ( + ABCIE, + ABCIViewIE, + ABCIViewShowSeriesIE, +) +from .abcnews import ( + AbcNewsIE, + AbcNewsVideoIE, +) +from .abcotvs import ( + ABCOTVSIE, + ABCOTVSClipsIE, +) +from .abematv import ( + AbemaTVIE, + AbemaTVTitleIE, +) +from .academicearth import AcademicEarthCourseIE +from .acast import ( + ACastIE, + ACastChannelIE, +) +from .acfun import AcFunVideoIE, AcFunBangumiIE +from .adn import ADNIE, ADNSeasonIE +from .adobeconnect import AdobeConnectIE +from .adobetv import ( + AdobeTVEmbedIE, + AdobeTVIE, + AdobeTVShowIE, + AdobeTVChannelIE, + AdobeTVVideoIE, +) +from .adultswim import AdultSwimIE +from .aenetworks import ( + AENetworksIE, + AENetworksCollectionIE, + AENetworksShowIE, + HistoryTopicIE, + HistoryPlayerIE, + BiographyIE, +) +from .aeonco import AeonCoIE +from .afreecatv import ( + AfreecaTVIE, + AfreecaTVLiveIE, + AfreecaTVUserIE, +) +from .agora import ( + TokFMAuditionIE, + TokFMPodcastIE, + WyborczaPodcastIE, + WyborczaVideoIE, +) +from .airtv import AirTVIE +from .aitube import AitubeKZVideoIE +from .aljazeera import AlJazeeraIE +from .allstar import ( + AllstarIE, + AllstarProfileIE, +) +from .alphaporno import AlphaPornoIE +from .altcensored import ( + AltCensoredIE, + AltCensoredChannelIE, +) +from .alura import ( + AluraIE, + AluraCourseIE +) +from .amadeustv import AmadeusTVIE +from .amara import AmaraIE +from .amcnetworks import AMCNetworksIE +from .amazon import ( + AmazonStoreIE, + AmazonReviewsIE, +) +from .amazonminitv import ( + AmazonMiniTVIE, + AmazonMiniTVSeasonIE, + AmazonMiniTVSeriesIE, +) +from .americastestkitchen import ( + AmericasTestKitchenIE, + AmericasTestKitchenSeasonIE, +) +from .anchorfm import AnchorFMEpisodeIE +from .angel import AngelIE +from .anvato import AnvatoIE +from .aol import AolIE +from .allocine import AllocineIE +from .aliexpress import AliExpressLiveIE +from .alsace20tv import ( + Alsace20TVIE, + Alsace20TVEmbedIE, +) +from .apa import APAIE +from .aparat import AparatIE +from .appleconnect import AppleConnectIE +from .appletrailers import ( + AppleTrailersIE, + AppleTrailersSectionIE, +) +from .applepodcasts import ApplePodcastsIE +from .archiveorg import ( + ArchiveOrgIE, + YoutubeWebArchiveIE, +) +from .arcpublishing import ArcPublishingIE +from .arkena import ArkenaIE +from .ard import ( + ARDBetaMediathekIE, + ARDMediathekCollectionIE, + ARDIE, +) +from .art19 import ( + Art19IE, + Art19ShowIE, +) +from .arte import ( + ArteTVIE, + ArteTVEmbedIE, + ArteTVPlaylistIE, + ArteTVCategoryIE, +) +from .arnes import ArnesIE +from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE +from .atresplayer import AtresPlayerIE +from .atscaleconf import AtScaleConfEventIE +from .atvat import ATVAtIE +from .audimedia import AudiMediaIE +from .audioboom import AudioBoomIE +from .audiodraft import ( + AudiodraftCustomIE, + AudiodraftGenericIE, +) +from .audiomack import AudiomackIE, AudiomackAlbumIE +from .audius import ( + AudiusIE, + AudiusTrackIE, + AudiusPlaylistIE, + AudiusProfileIE, +) +from .awaan import ( + AWAANIE, + AWAANVideoIE, + AWAANLiveIE, + AWAANSeasonIE, +) +from .axs import AxsIE +from .azmedien import AZMedienIE +from .baidu import BaiduVideoIE +from .banbye import ( + BanByeIE, + BanByeChannelIE, +) +from .bandaichannel import BandaiChannelIE +from .bandcamp import ( + BandcampIE, + BandcampAlbumIE, + BandcampWeeklyIE, + BandcampUserIE, +) +from .bannedvideo import BannedVideoIE +from .bbc import ( + BBCCoUkIE, + BBCCoUkArticleIE, + BBCCoUkIPlayerEpisodesIE, + BBCCoUkIPlayerGroupIE, + BBCCoUkPlaylistIE, + BBCIE, +) +from .beeg import BeegIE +from .behindkink import BehindKinkIE +from .bellmedia import BellMediaIE +from .beatbump import ( + BeatBumpVideoIE, + BeatBumpPlaylistIE, +) +from .beatport import BeatportIE +from .berufetv import BerufeTVIE +from .bet import BetIE +from .bfi import BFIPlayerIE +from .bfmtv import ( + BFMTVIE, + BFMTVLiveIE, + BFMTVArticleIE, +) +from .bibeltv import ( + BibelTVLiveIE, + BibelTVSeriesIE, + BibelTVVideoIE, +) +from .bigflix import BigflixIE +from .bigo import BigoIE +from .bild import BildIE +from .bilibili import ( + BiliBiliIE, + BiliBiliBangumiIE, + BiliBiliBangumiSeasonIE, + BiliBiliBangumiMediaIE, + BilibiliCheeseIE, + BilibiliCheeseSeasonIE, + BiliBiliSearchIE, + BilibiliCategoryIE, + BilibiliAudioIE, + BilibiliAudioAlbumIE, + BiliBiliPlayerIE, + BilibiliSpaceVideoIE, + BilibiliSpaceAudioIE, + BilibiliCollectionListIE, + BilibiliSeriesListIE, + BilibiliFavoritesListIE, + BilibiliWatchlaterIE, + BilibiliPlaylistIE, + BiliIntlIE, + BiliIntlSeriesIE, + BiliLiveIE, +) +from .biobiochiletv import BioBioChileTVIE +from .bitchute import ( + BitChuteIE, + BitChuteChannelIE, +) +from .blackboardcollaborate import BlackboardCollaborateIE +from .bleacherreport import ( + BleacherReportIE, + BleacherReportCMSIE, +) +from .blerp import BlerpIE +from .blogger import BloggerIE +from .bloomberg import BloombergIE +from .bokecc import BokeCCIE +from .bongacams import BongaCamsIE +from .boosty import BoostyIE +from .bostonglobe import BostonGlobeIE +from .box import BoxIE +from .boxcast import BoxCastVideoIE +from .bpb import BpbIE +from .br import BRIE +from .bravotv import BravoTVIE +from .brainpop import ( + BrainPOPIE, + BrainPOPJrIE, + BrainPOPELLIE, + BrainPOPEspIE, + BrainPOPFrIE, + BrainPOPIlIE, +) +from .breitbart import BreitBartIE +from .brightcove import ( + BrightcoveLegacyIE, + BrightcoveNewIE, +) +from .brilliantpala import ( + BrilliantpalaElearnIE, + BrilliantpalaClassesIE, +) +from .businessinsider import BusinessInsiderIE +from .bundesliga import BundesligaIE +from .bundestag import BundestagIE +from .buzzfeed import BuzzFeedIE +from .byutv import BYUtvIE +from .c56 import C56IE +from .cableav import CableAVIE +from .callin import CallinIE +from .caltrans import CaltransIE +from .cam4 import CAM4IE +from .camdemy import ( + CamdemyIE, + CamdemyFolderIE +) +from .camfm import ( + CamFMEpisodeIE, + CamFMShowIE +) +from .cammodels import CamModelsIE +from .camsoda import CamsodaIE +from .camtasia import CamtasiaEmbedIE +from .canal1 import Canal1IE +from .canalalpha import CanalAlphaIE +from .canalplus import CanalplusIE +from .canalc2 import Canalc2IE +from .caracoltv import CaracolTvPlayIE +from .cartoonnetwork import CartoonNetworkIE +from .cbc import ( + CBCIE, + CBCPlayerIE, + CBCPlayerPlaylistIE, + CBCGemIE, + CBCGemPlaylistIE, + CBCGemLiveIE, +) +from .cbs import ( + CBSIE, + ParamountPressExpressIE, +) +from .cbsnews import ( + CBSNewsEmbedIE, + CBSNewsIE, + CBSLocalIE, + CBSLocalArticleIE, + CBSLocalLiveIE, + CBSNewsLiveIE, + CBSNewsLiveVideoIE, +) +from .cbssports import ( + CBSSportsEmbedIE, + CBSSportsIE, + TwentyFourSevenSportsIE, +) +from .ccc import ( + CCCIE, + CCCPlaylistIE, +) +from .ccma import CCMAIE +from .cctv import CCTVIE +from .cda import CDAIE +from .cellebrite import CellebriteIE +from .ceskatelevize import CeskaTelevizeIE +from .cgtn import CGTNIE +from .charlierose import CharlieRoseIE +from .chaturbate import ChaturbateIE +from .chilloutzone import ChilloutzoneIE +from .chzzk import ( + CHZZKLiveIE, + CHZZKVideoIE, +) +from .cinemax import CinemaxIE +from .cinetecamilano import CinetecaMilanoIE +from .cineverse import ( + CineverseIE, + CineverseDetailsIE, +) +from .ciscolive import ( + CiscoLiveSessionIE, + CiscoLiveSearchIE, +) +from .ciscowebex import CiscoWebexIE +from .cjsw import CJSWIE +from .clipchamp import ClipchampIE +from .clippit import ClippitIE +from .cliprs import ClipRsIE +from .closertotruth import CloserToTruthIE +from .cloudflarestream import CloudflareStreamIE +from .cloudycdn import CloudyCDNIE +from .clubic import ClubicIE +from .clyp import ClypIE +from .cmt import CMTIE +from .cnbc import ( + CNBCVideoIE, +) +from .cnn import ( + CNNIE, + CNNBlogsIE, + CNNArticleIE, + CNNIndonesiaIE, +) +from .coub import CoubIE +from .comedycentral import ( + ComedyCentralIE, + ComedyCentralTVIE, +) +from .commonmistakes import CommonMistakesIE, UnicodeBOMIE +from .commonprotocols import ( + MmsIE, + RtmpIE, + ViewSourceIE, +) +from .condenast import CondeNastIE +from .contv import CONtvIE +from .corus import CorusIE +from .cpac import ( + CPACIE, + CPACPlaylistIE, +) +from .cozytv import CozyTVIE +from .cracked import CrackedIE +from .crackle import CrackleIE +from .craftsy import CraftsyIE +from .crooksandliars import CrooksAndLiarsIE +from .crowdbunker import ( + CrowdBunkerIE, + CrowdBunkerChannelIE, +) +from .crtvg import CrtvgIE +from .crunchyroll import ( + CrunchyrollBetaIE, + CrunchyrollBetaShowIE, + CrunchyrollMusicIE, + CrunchyrollArtistIE, +) +from .cspan import CSpanIE, CSpanCongressIE +from .ctsnews import CtsNewsIE +from .ctv import CTVIE +from .ctvnews import CTVNewsIE +from .cultureunplugged import CultureUnpluggedIE +from .curiositystream import ( + CuriosityStreamIE, + CuriosityStreamCollectionsIE, + CuriosityStreamSeriesIE, +) +from .cwtv import CWTVIE +from .cybrary import ( + CybraryIE, + CybraryCourseIE +) +from .dacast import ( + DacastVODIE, + DacastPlaylistIE, +) +from .dailymail import DailyMailIE +from .dailymotion import ( + DailymotionIE, + DailymotionPlaylistIE, + DailymotionSearchIE, + DailymotionUserIE, +) +from .dailywire import ( + DailyWireIE, + DailyWirePodcastIE, +) +from .damtomo import ( + DamtomoRecordIE, + DamtomoVideoIE, +) +from .daum import ( + DaumIE, + DaumClipIE, + DaumPlaylistIE, + DaumUserIE, +) +from .daystar import DaystarClipIE +from .dbtv import DBTVIE +from .dctp import DctpTvIE +from .deezer import ( + DeezerPlaylistIE, + DeezerAlbumIE, +) +from .democracynow import DemocracynowIE +from .detik import DetikEmbedIE +from .dlf import ( + DLFIE, + DLFCorpusIE, +) +from .dfb import DFBIE +from .dhm import DHMIE +from .douyutv import ( + DouyuShowIE, + DouyuTVIE, +) +from .dplay import ( + DPlayIE, + DiscoveryPlusIE, + HGTVDeIE, + GoDiscoveryIE, + TravelChannelIE, + CookingChannelIE, + HGTVUsaIE, + FoodNetworkIE, + InvestigationDiscoveryIE, + DestinationAmericaIE, + AmHistoryChannelIE, + ScienceChannelIE, + DIYNetworkIE, + DiscoveryLifeIE, + AnimalPlanetIE, + TLCIE, + MotorTrendIE, + MotorTrendOnDemandIE, + DiscoveryPlusIndiaIE, + DiscoveryNetworksDeIE, + DiscoveryPlusItalyIE, + DiscoveryPlusItalyShowIE, + DiscoveryPlusIndiaShowIE, + GlobalCyclingNetworkPlusIE, +) +from .dreisat import DreiSatIE +from .drbonanza import DRBonanzaIE +from .drtuber import DrTuberIE +from .drtv import ( + DRTVIE, + DRTVLiveIE, + DRTVSeasonIE, + DRTVSeriesIE, +) +from .dtube import DTubeIE +from .dvtv import DVTVIE +from .duboku import ( + DubokuIE, + DubokuPlaylistIE +) +from .dumpert import DumpertIE +from .deuxm import ( + DeuxMIE, + DeuxMNewsIE +) +from .digitalconcerthall import DigitalConcertHallIE +from .discogs import DiscogsReleasePlaylistIE +from .discovery import DiscoveryIE +from .disney import DisneyIE +from .dispeak import DigitallySpeakingIE +from .dropbox import DropboxIE +from .dropout import ( + DropoutSeasonIE, + DropoutIE +) +from .duoplay import DuoplayIE +from .dw import ( + DWIE, + DWArticleIE, +) +from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE +from .ebaumsworld import EbaumsWorldIE +from .ebay import EbayIE +from .egghead import ( + EggheadCourseIE, + EggheadLessonIE, +) +from .eighttracks import EightTracksIE +from .einthusan import EinthusanIE +from .eitb import EitbIE +from .elementorembed import ElementorEmbedIE +from .elonet import ElonetIE +from .elpais import ElPaisIE +from .eltrecetv import ElTreceTVIE +from .embedly import EmbedlyIE +from .epicon import ( + EpiconIE, + EpiconSeriesIE, +) +from .epidemicsound import EpidemicSoundIE +from .eplus import EplusIbIE +from .epoch import EpochIE +from .eporner import EpornerIE +from .erocast import ErocastIE +from .eroprofile import ( + EroProfileIE, + EroProfileAlbumIE, +) +from .err import ERRJupiterIE +from .ertgr import ( + ERTFlixCodenameIE, + ERTFlixIE, + ERTWebtvEmbedIE, +) +from .espn import ( + ESPNIE, + WatchESPNIE, + ESPNArticleIE, + FiveThirtyEightIE, + ESPNCricInfoIE, +) +from .ettutv import EttuTvIE +from .europa import EuropaIE, EuroParlWebstreamIE +from .europeantour import EuropeanTourIE +from .eurosport import EurosportIE +from .euscreen import EUScreenIE +from .expressen import ExpressenIE +from .eyedotv import EyedoTVIE +from .facebook import ( + FacebookIE, + FacebookPluginsVideoIE, + FacebookRedirectURLIE, + FacebookReelIE, + FacebookAdsIE, +) +from .fancode import ( + FancodeVodIE, + FancodeLiveIE +) + +from .faz import FazIE +from .fc2 import ( + FC2IE, + FC2EmbedIE, + FC2LiveIE, +) +from .fczenit import FczenitIE +from .fifa import FifaIE +from .filmon import ( + FilmOnIE, + FilmOnChannelIE, +) +from .filmweb import FilmwebIE +from .firsttv import FirstTVIE +from .fivetv import FiveTVIE +from .flextv import FlexTVIE +from .flickr import FlickrIE +from .floatplane import ( + FloatplaneIE, + FloatplaneChannelIE, +) +from .folketinget import FolketingetIE +from .footyroom import FootyRoomIE +from .formula1 import Formula1IE +from .fourtube import ( + FourTubeIE, + PornTubeIE, + PornerBrosIE, + FuxIE, +) +from .fox import FOXIE +from .fox9 import ( + FOX9IE, + FOX9NewsIE, +) +from .foxnews import ( + FoxNewsIE, + FoxNewsArticleIE, + FoxNewsVideoIE, +) +from .foxsports import FoxSportsIE +from .fptplay import FptplayIE +from .franceinter import FranceInterIE +from .francetv import ( + FranceTVIE, + FranceTVSiteIE, + FranceTVInfoIE, +) +from .freesound import FreesoundIE +from .freespeech import FreespeechIE +from .frontendmasters import ( + FrontendMastersIE, + FrontendMastersLessonIE, + FrontendMastersCourseIE +) +from .freetv import ( + FreeTvIE, + FreeTvMoviesIE, +) +from .fujitv import FujiTVFODPlus7IE +from .funimation import ( + FunimationIE, + FunimationPageIE, + FunimationShowIE, +) +from .funk import FunkIE +from .funker530 import Funker530IE +from .fuyintv import FuyinTVIE +from .gab import ( + GabTVIE, + GabIE, +) +from .gaia import GaiaIE +from .gamejolt import ( + GameJoltIE, + GameJoltUserIE, + GameJoltGameIE, + GameJoltGameSoundtrackIE, + GameJoltCommunityIE, + GameJoltSearchIE, +) +from .gamespot import GameSpotIE +from .gamestar import GameStarIE +from .gaskrank import GaskrankIE +from .gazeta import GazetaIE +from .gdcvault import GDCVaultIE +from .gedidigital import GediDigitalIE +from .generic import GenericIE +from .genius import ( + GeniusIE, + GeniusLyricsIE, +) +from .getcourseru import ( + GetCourseRuPlayerIE, + GetCourseRuIE +) +from .gettr import ( + GettrIE, + GettrStreamingIE, +) +from .giantbomb import GiantBombIE +from .glide import GlideIE +from .globalplayer import ( + GlobalPlayerLiveIE, + GlobalPlayerLivePlaylistIE, + GlobalPlayerAudioIE, + GlobalPlayerAudioEpisodeIE, + GlobalPlayerVideoIE +) +from .globo import ( + GloboIE, + GloboArticleIE, +) +from .gmanetwork import GMANetworkVideoIE +from .go import GoIE +from .godtube import GodTubeIE +from .gofile import GofileIE +from .golem import GolemIE +from .goodgame import GoodGameIE +from .googledrive import ( + GoogleDriveIE, + GoogleDriveFolderIE, +) +from .googlepodcasts import ( + GooglePodcastsIE, + GooglePodcastsFeedIE, +) +from .googlesearch import GoogleSearchIE +from .gopro import GoProIE +from .goplay import GoPlayIE +from .goshgay import GoshgayIE +from .gotostage import GoToStageIE +from .gputechconf import GPUTechConfIE +from .gronkh import ( + GronkhIE, + GronkhFeedIE, + GronkhVodsIE +) +from .groupon import GrouponIE +from .harpodeon import HarpodeonIE +from .hbo import HBOIE +from .hearthisat import HearThisAtIE +from .heise import HeiseIE +from .hellporno import HellPornoIE +from .hgtv import HGTVComShowIE +from .hketv import HKETVIE +from .hidive import HiDiveIE +from .historicfilms import HistoricFilmsIE +from .hitrecord import HitRecordIE +from .hollywoodreporter import ( + HollywoodReporterIE, + HollywoodReporterPlaylistIE, +) +from .holodex import HolodexIE +from .hotnewhiphop import HotNewHipHopIE +from .hotstar import ( + HotStarIE, + HotStarPrefixIE, + HotStarPlaylistIE, + HotStarSeasonIE, + HotStarSeriesIE, +) +from .hrefli import HrefLiRedirectIE +from .hrfensehen import HRFernsehenIE +from .hrti import ( + HRTiIE, + HRTiPlaylistIE, +) +from .hse import ( + HSEShowIE, + HSEProductIE, +) +from .genericembeds import ( + HTML5MediaEmbedIE, + QuotedHTMLIE, +) +from .huajiao import HuajiaoIE +from .huya import HuyaLiveIE +from .huffpost import HuffPostIE +from .hungama import ( + HungamaIE, + HungamaSongIE, + HungamaAlbumPlaylistIE, +) +from .hypem import HypemIE +from .hypergryph import MonsterSirenHypergryphMusicIE +from .hytale import HytaleIE +from .icareus import IcareusIE +from .ichinanalive import ( + IchinanaLiveIE, + IchinanaLiveClipIE, +) +from .idolplus import IdolPlusIE +from .ign import ( + IGNIE, + IGNVideoIE, + IGNArticleIE, +) +from .iheart import ( + IHeartRadioIE, + IHeartRadioPodcastIE, +) +from .ilpost import IlPostIE +from .iltalehti import IltalehtiIE +from .imdb import ( + ImdbIE, + ImdbListIE +) +from .imgur import ( + ImgurIE, + ImgurAlbumIE, + ImgurGalleryIE, +) +from .ina import InaIE +from .inc import IncIE +from .indavideo import IndavideoEmbedIE +from .infoq import InfoQIE +from .instagram import ( + InstagramIE, + InstagramIOSIE, + InstagramUserIE, + InstagramTagIE, + InstagramStoryIE, +) +from .internazionale import InternazionaleIE +from .internetvideoarchive import InternetVideoArchiveIE +from .iprima import ( + IPrimaIE, + IPrimaCNNIE +) +from .iqiyi import ( + IqiyiIE, + IqIE, + IqAlbumIE +) +from .islamchannel import ( + IslamChannelIE, + IslamChannelSeriesIE, +) +from .israelnationalnews import IsraelNationalNewsIE +from .itprotv import ( + ITProTVIE, + ITProTVCourseIE +) +from .itv import ( + ITVIE, + ITVBTCCIE, +) +from .ivi import ( + IviIE, + IviCompilationIE +) +from .ivideon import IvideonIE +from .iwara import ( + IwaraIE, + IwaraPlaylistIE, + IwaraUserIE, +) +from .ixigua import IxiguaIE +from .izlesene import IzleseneIE +from .jable import ( + JableIE, + JablePlaylistIE, +) +from .jamendo import ( + JamendoIE, + JamendoAlbumIE, +) +from .japandiet import ( + ShugiinItvLiveIE, + ShugiinItvLiveRoomIE, + ShugiinItvVodIE, + SangiinInstructionIE, + SangiinIE, +) +from .jeuxvideo import JeuxVideoIE +from .jiosaavn import ( + JioSaavnSongIE, + JioSaavnAlbumIE, +) +from .jove import JoveIE +from .joj import JojIE +from .joqrag import JoqrAgIE +from .jstream import JStreamIE +from .jtbc import ( + JTBCIE, + JTBCProgramIE, +) +from .jwplatform import JWPlatformIE +from .kakao import KakaoIE +from .kaltura import KalturaIE +from .kankanews import KankaNewsIE +from .karaoketv import KaraoketvIE +from .kelbyone import KelbyOneIE +from .khanacademy import ( + KhanAcademyIE, + KhanAcademyUnitIE, +) +from .kick import ( + KickIE, + KickVODIE, +) +from .kicker import KickerIE +from .kickstarter import KickStarterIE +from .kinja import KinjaEmbedIE +from .kinopoisk import KinoPoiskIE +from .kommunetv import KommunetvIE +from .kompas import KompasVideoIE +from .koo import KooIE +from .kth import KTHIE +from .krasview import KrasViewIE +from .ku6 import Ku6IE +from .kukululive import KukuluLiveIE +from .kuwo import ( + KuwoIE, + KuwoAlbumIE, + KuwoChartIE, + KuwoSingerIE, + KuwoCategoryIE, + KuwoMvIE, +) +from .la7 import ( + LA7IE, + LA7PodcastEpisodeIE, + LA7PodcastIE, +) +from .lastfm import ( + LastFMIE, + LastFMPlaylistIE, + LastFMUserIE, +) +from .laxarxames import LaXarxaMesIE +from .lbry import ( + LBRYIE, + LBRYChannelIE, + LBRYPlaylistIE, +) +from .lci import LCIIE +from .lcp import ( + LcpPlayIE, + LcpIE, +) +from .lecture2go import Lecture2GoIE +from .lecturio import ( + LecturioIE, + LecturioCourseIE, + LecturioDeCourseIE, +) +from .leeco import ( + LeIE, + LePlaylistIE, + LetvCloudIE, +) +from .lefigaro import ( + LeFigaroVideoEmbedIE, + LeFigaroVideoSectionIE, +) +from .lego import LEGOIE +from .lemonde import LemondeIE +from .lenta import LentaIE +from .libraryofcongress import LibraryOfCongressIE +from .libsyn import LibsynIE +from .lifenews import ( + LifeNewsIE, + LifeEmbedIE, +) +from .likee import ( + LikeeIE, + LikeeUserIE +) +from .limelight import ( + LimelightMediaIE, + LimelightChannelIE, + LimelightChannelListIE, +) +from .linkedin import ( + LinkedInIE, + LinkedInLearningIE, + LinkedInLearningCourseIE, +) +from .liputan6 import Liputan6IE +from .listennotes import ListenNotesIE +from .litv import LiTVIE +from .livejournal import LiveJournalIE +from .livestream import ( + LivestreamIE, + LivestreamOriginalIE, + LivestreamShortenerIE, +) +from .livestreamfails import LivestreamfailsIE +from .lnkgo import ( + LnkGoIE, + LnkIE, +) +from .lovehomeporn import LoveHomePornIE +from .lrt import ( + LRTVODIE, + LRTStreamIE +) +from .lsm import ( + LSMLREmbedIE, + LSMLTVEmbedIE, + LSMReplayIE +) +from .lumni import ( + LumniIE +) +from .lynda import ( + LyndaIE, + LyndaCourseIE +) +from .maariv import MaarivIE +from .magellantv import MagellanTVIE +from .magentamusik import MagentaMusikIE +from .mailru import ( + MailRuIE, + MailRuMusicIE, + MailRuMusicSearchIE, +) +from .mainstreaming import MainStreamingIE +from .mangomolo import ( + MangomoloVideoIE, + MangomoloLiveIE, +) +from .manoto import ( + ManotoTVIE, + ManotoTVShowIE, + ManotoTVLiveIE, +) +from .manyvids import ManyVidsIE +from .maoritv import MaoriTVIE +from .markiza import ( + MarkizaIE, + MarkizaPageIE, +) +from .massengeschmacktv import MassengeschmackTVIE +from .masters import MastersIE +from .matchtv import MatchTVIE +from .mbn import MBNIE +from .mdr import MDRIE +from .medaltv import MedalTVIE +from .mediaite import MediaiteIE +from .mediaklikk import MediaKlikkIE +from .mediaset import ( + MediasetIE, + MediasetShowIE, +) +from .mediasite import ( + MediasiteIE, + MediasiteCatalogIE, + MediasiteNamedCatalogIE, +) +from .mediastream import ( + MediaStreamIE, + WinSportsVideoIE, +) +from .mediaworksnz import MediaWorksNZVODIE +from .medici import MediciIE +from .megaphone import MegaphoneIE +from .meipai import MeipaiIE +from .melonvod import MelonVODIE +from .metacritic import MetacriticIE +from .mgtv import MGTVIE +from .microsoftstream import MicrosoftStreamIE +from .microsoftvirtualacademy import ( + MicrosoftVirtualAcademyIE, + MicrosoftVirtualAcademyCourseIE, +) +from .microsoftembed import MicrosoftEmbedIE +from .mildom import ( + MildomIE, + MildomVodIE, + MildomClipIE, + MildomUserVodIE, +) +from .minds import ( + MindsIE, + MindsChannelIE, + MindsGroupIE, +) +from .minoto import MinotoIE +from .mirrativ import ( + MirrativIE, + MirrativUserIE, +) +from .mirrorcouk import MirrorCoUKIE +from .mit import TechTVMITIE, OCWMITIE +from .mitele import MiTeleIE +from .mixch import ( + MixchIE, + MixchArchiveIE, +) +from .mixcloud import ( + MixcloudIE, + MixcloudUserIE, + MixcloudPlaylistIE, +) +from .mlb import ( + MLBIE, + MLBVideoIE, + MLBTVIE, + MLBArticleIE, +) +from .mlssoccer import MLSSoccerIE +from .mocha import MochaVideoIE +from .mojvideo import MojvideoIE +from .monstercat import MonstercatIE +from .motherless import ( + MotherlessIE, + MotherlessGroupIE, + MotherlessGalleryIE, + MotherlessUploaderIE, +) +from .motorsport import MotorsportIE +from .moviepilot import MoviepilotIE +from .moview import MoviewPlayIE +from .moviezine import MoviezineIE +from .movingimage import MovingImageIE +from .msn import MSNIE +from .mtv import ( + MTVIE, + MTVVideoIE, + MTVServicesEmbeddedIE, + MTVDEIE, + MTVJapanIE, + MTVItaliaIE, + MTVItaliaProgrammaIE, +) +from .muenchentv import MuenchenTVIE +from .murrtube import MurrtubeIE, MurrtubeUserIE +from .museai import MuseAIIE +from .musescore import MuseScoreIE +from .musicdex import ( + MusicdexSongIE, + MusicdexAlbumIE, + MusicdexArtistIE, + MusicdexPlaylistIE, +) +from .mx3 import ( + Mx3IE, + Mx3NeoIE, + Mx3VolksmusikIE, +) +from .mxplayer import ( + MxplayerIE, + MxplayerShowIE, +) +from .myspace import MySpaceIE, MySpaceAlbumIE +from .myspass import MySpassIE +from .myvideoge import MyVideoGeIE +from .myvidster import MyVidsterIE +from .mzaalo import MzaaloIE +from .n1 import ( + N1InfoAssetIE, + N1InfoIIE, +) +from .nate import ( + NateIE, + NateProgramIE, +) +from .nationalgeographic import ( + NationalGeographicVideoIE, + NationalGeographicTVIE, +) +from .naver import ( + NaverIE, + NaverLiveIE, + NaverNowIE, +) +from .nba import ( + NBAWatchEmbedIE, + NBAWatchIE, + NBAWatchCollectionIE, + NBAEmbedIE, + NBAIE, + NBAChannelIE, +) +from .nbc import ( + NBCIE, + NBCNewsIE, + NBCOlympicsIE, + NBCOlympicsStreamIE, + NBCSportsIE, + NBCSportsStreamIE, + NBCSportsVPlayerIE, + NBCStationsIE, +) +from .ndr import ( + NDRIE, + NJoyIE, + NDREmbedBaseIE, + NDREmbedIE, + NJoyEmbedIE, +) +from .ndtv import NDTVIE +from .nebula import ( + NebulaIE, + NebulaClassIE, + NebulaSubscriptionsIE, + NebulaChannelIE, +) +from .nekohacker import NekoHackerIE +from .nerdcubed import NerdCubedFeedIE +from .netzkino import NetzkinoIE +from .neteasemusic import ( + NetEaseMusicIE, + NetEaseMusicAlbumIE, + NetEaseMusicSingerIE, + NetEaseMusicListIE, + NetEaseMusicMvIE, + NetEaseMusicProgramIE, + NetEaseMusicDjRadioIE, +) +from .netverse import ( + NetverseIE, + NetversePlaylistIE, + NetverseSearchIE, +) +from .newgrounds import ( + NewgroundsIE, + NewgroundsPlaylistIE, + NewgroundsUserIE, +) +from .newspicks import NewsPicksIE +from .newsy import NewsyIE +from .nextmedia import ( + NextMediaIE, + NextMediaActionNewsIE, + AppleDailyIE, + NextTVIE, +) +from .nexx import ( + NexxIE, + NexxEmbedIE, +) +from .nfb import ( + NFBIE, + NFBSeriesIE, +) +from .nfhsnetwork import NFHSNetworkIE +from .nfl import ( + NFLIE, + NFLArticleIE, + NFLPlusEpisodeIE, + NFLPlusReplayIE, +) +from .nhk import ( + NhkVodIE, + NhkVodProgramIE, + NhkForSchoolBangumiIE, + NhkForSchoolSubjectIE, + NhkForSchoolProgramListIE, + NhkRadioNewsPageIE, + NhkRadiruIE, + NhkRadiruLiveIE, +) +from .nhl import NHLIE +from .nick import ( + NickIE, + NickBrIE, + NickDeIE, + NickRuIE, +) +from .niconico import ( + NiconicoIE, + NiconicoPlaylistIE, + NiconicoUserIE, + NiconicoSeriesIE, + NiconicoHistoryIE, + NicovideoSearchDateIE, + NicovideoSearchIE, + NicovideoSearchURLIE, + NicovideoTagURLIE, + NiconicoLiveIE, +) +from .ninaprotocol import NinaProtocolIE +from .ninecninemedia import ( + NineCNineMediaIE, + CPTwentyFourIE, +) +from .niconicochannelplus import ( + NiconicoChannelPlusIE, + NiconicoChannelPlusChannelVideosIE, + NiconicoChannelPlusChannelLivesIE, +) +from .ninegag import NineGagIE +from .ninenews import NineNewsIE +from .ninenow import NineNowIE +from .nintendo import NintendoIE +from .nitter import NitterIE +from .nobelprize import NobelPrizeIE +from .noice import NoicePodcastIE +from .nonktube import NonkTubeIE +from .noodlemagazine import NoodleMagazineIE +from .noovo import NoovoIE +from .nosnl import NOSNLArticleIE +from .nova import ( + NovaEmbedIE, + NovaIE, +) +from .novaplay import NovaPlayIE +from .nowness import ( + NownessIE, + NownessPlaylistIE, + NownessSeriesIE, +) +from .noz import NozIE +from .npo import ( + AndereTijdenIE, + NPOIE, + NPOLiveIE, + NPORadioIE, + NPORadioFragmentIE, + SchoolTVIE, + HetKlokhuisIE, + VPROIE, + WNLIE, +) +from .npr import NprIE +from .nrk import ( + NRKIE, + NRKPlaylistIE, + NRKSkoleIE, + NRKTVIE, + NRKTVDirekteIE, + NRKRadioPodkastIE, + NRKTVEpisodeIE, + NRKTVEpisodesIE, + NRKTVSeasonIE, + NRKTVSeriesIE, +) +from .nrl import NRLTVIE +from .ntvcojp import NTVCoJpCUIE +from .ntvde import NTVDeIE +from .ntvru import NTVRuIE +from .nubilesporn import NubilesPornIE +from .nytimes import ( + NYTimesIE, + NYTimesArticleIE, + NYTimesCookingIE, + NYTimesCookingRecipeIE, +) +from .nuum import ( + NuumLiveIE, + NuumTabIE, + NuumMediaIE, +) +from .nuvid import NuvidIE +from .nzherald import NZHeraldIE +from .nzonscreen import NZOnScreenIE +from .nzz import NZZIE +from .odkmedia import OnDemandChinaEpisodeIE +from .odnoklassniki import OdnoklassnikiIE +from .oftv import ( + OfTVIE, + OfTVPlaylistIE +) +from .oktoberfesttv import OktoberfestTVIE +from .olympics import OlympicsReplayIE +from .on24 import On24IE +from .ondemandkorea import ( + OnDemandKoreaIE, + OnDemandKoreaProgramIE, +) +from .onefootball import OneFootballIE +from .onenewsnz import OneNewsNZIE +from .oneplace import OnePlacePodcastIE +from .onet import ( + OnetIE, + OnetChannelIE, + OnetMVPIE, + OnetPlIE, +) +from .onionstudios import OnionStudiosIE +from .opencast import ( + OpencastIE, + OpencastPlaylistIE, +) +from .openrec import ( + OpenRecIE, + OpenRecCaptureIE, + OpenRecMovieIE, +) +from .ora import OraTVIE +from .orf import ( + ORFTVthekIE, + ORFFM4StoryIE, + ORFONIE, + ORFRadioIE, + ORFPodcastIE, + ORFIPTVIE, +) +from .outsidetv import OutsideTVIE +from .owncloud import OwnCloudIE +from .packtpub import ( + PacktPubIE, + PacktPubCourseIE, +) +from .palcomp3 import ( + PalcoMP3IE, + PalcoMP3ArtistIE, + PalcoMP3VideoIE, +) +from .panopto import ( + PanoptoIE, + PanoptoListIE, + PanoptoPlaylistIE +) +from .paramountplus import ( + ParamountPlusIE, + ParamountPlusSeriesIE, +) +from .parler import ParlerIE +from .parlview import ParlviewIE +from .patreon import ( + PatreonIE, + PatreonCampaignIE +) +from .pbs import PBSIE, PBSKidsIE +from .pearvideo import PearVideoIE +from .peekvids import PeekVidsIE, PlayVidsIE +from .peertube import ( + PeerTubeIE, + PeerTubePlaylistIE, +) +from .peertv import PeerTVIE +from .peloton import ( + PelotonIE, + PelotonLiveIE +) +from .performgroup import PerformGroupIE +from .periscope import ( + PeriscopeIE, + PeriscopeUserIE, +) +from .pgatour import PGATourIE +from .philharmoniedeparis import PhilharmonieDeParisIE +from .phoenix import PhoenixIE +from .photobucket import PhotobucketIE +from .piapro import PiaproIE +from .piaulizaportal import PIAULIZAPortalIE +from .picarto import ( + PicartoIE, + PicartoVodIE, +) +from .piksel import PikselIE +from .pinkbike import PinkbikeIE +from .pinterest import ( + PinterestIE, + PinterestCollectionIE, +) +from .pixivsketch import ( + PixivSketchIE, + PixivSketchUserIE, +) +from .pladform import PladformIE +from .planetmarathi import PlanetMarathiIE +from .platzi import ( + PlatziIE, + PlatziCourseIE, +) +from .playplustv import PlayPlusTVIE +from .playsuisse import PlaySuisseIE +from .playtvak import PlaytvakIE +from .playwire import PlaywireIE +from .plutotv import PlutoTVIE +from .pluralsight import ( + PluralsightIE, + PluralsightCourseIE, +) +from .podbayfm import PodbayFMIE, PodbayFMChannelIE +from .podchaser import PodchaserIE +from .podomatic import PodomaticIE +from .pokemon import ( + PokemonIE, + PokemonWatchIE, +) +from .pokergo import ( + PokerGoIE, + PokerGoCollectionIE, +) +from .polsatgo import PolsatGoIE +from .polskieradio import ( + PolskieRadioIE, + PolskieRadioLegacyIE, + PolskieRadioAuditionIE, + PolskieRadioCategoryIE, + PolskieRadioPlayerIE, + PolskieRadioPodcastIE, + PolskieRadioPodcastListIE, +) +from .popcorntimes import PopcorntimesIE +from .popcorntv import PopcornTVIE +from .porn91 import Porn91IE +from .pornbox import PornboxIE +from .pornflip import PornFlipIE +from .pornhub import ( + PornHubIE, + PornHubUserIE, + PornHubPlaylistIE, + PornHubPagedVideoListIE, + PornHubUserVideosUploadIE, +) +from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE +from .pornoxo import PornoXOIE +from .puhutv import ( + PuhuTVIE, + PuhuTVSerieIE, +) +from .pr0gramm import Pr0grammIE +from .prankcast import PrankCastIE, PrankCastPostIE +from .premiershiprugby import PremiershipRugbyIE +from .presstv import PressTVIE +from .projectveritas import ProjectVeritasIE +from .prosiebensat1 import ProSiebenSat1IE +from .prx import ( + PRXStoryIE, + PRXSeriesIE, + PRXAccountIE, + PRXStoriesSearchIE, + PRXSeriesSearchIE +) +from .puls4 import Puls4IE +from .pyvideo import PyvideoIE +from .qdance import QDanceIE +from .qingting import QingTingIE +from .qqmusic import ( + QQMusicIE, + QQMusicSingerIE, + QQMusicAlbumIE, + QQMusicToplistIE, + QQMusicPlaylistIE, +) +from .r7 import ( + R7IE, + R7ArticleIE, +) +from .radiko import RadikoIE, RadikoRadioIE +from .radiocanada import ( + RadioCanadaIE, + RadioCanadaAudioVideoIE, +) +from .radiocomercial import ( + RadioComercialIE, + RadioComercialPlaylistIE, +) +from .radiode import RadioDeIE +from .radiojavan import RadioJavanIE +from .radiofrance import ( + FranceCultureIE, + RadioFranceIE, + RadioFranceLiveIE, + RadioFrancePodcastIE, + RadioFranceProfileIE, + RadioFranceProgramScheduleIE, +) +from .radiozet import RadioZetPodcastIE +from .radiokapital import ( + RadioKapitalIE, + RadioKapitalShowIE, +) +from .radlive import ( + RadLiveIE, + RadLiveChannelIE, + RadLiveSeasonIE, +) +from .rai import ( + RaiIE, + RaiCulturaIE, + RaiPlayIE, + RaiPlayLiveIE, + RaiPlayPlaylistIE, + RaiPlaySoundIE, + RaiPlaySoundLiveIE, + RaiPlaySoundPlaylistIE, + RaiNewsIE, + RaiSudtirolIE, +) +from .raywenderlich import ( + RayWenderlichIE, + RayWenderlichCourseIE, +) +from .rbgtum import ( + RbgTumIE, + RbgTumCourseIE, + RbgTumNewCourseIE, +) +from .rcs import ( + RCSIE, + RCSEmbedsIE, + RCSVariousIE, +) +from .rcti import ( + RCTIPlusIE, + RCTIPlusSeriesIE, + RCTIPlusTVIE, +) +from .rds import RDSIE +from .redbee import ParliamentLiveUKIE, RTBFIE +from .redbulltv import ( + RedBullTVIE, + RedBullEmbedIE, + RedBullTVRrnContentIE, + RedBullIE, +) +from .reddit import RedditIE +from .redge import RedCDNLivxIE +from .redgifs import ( + RedGifsIE, + RedGifsSearchIE, + RedGifsUserIE, +) +from .redtube import RedTubeIE +from .rentv import ( + RENTVIE, + RENTVArticleIE, +) +from .restudy import RestudyIE +from .reuters import ReutersIE +from .reverbnation import ReverbNationIE +from .rheinmaintv import RheinMainTVIE +from .ridehome import RideHomeIE +from .rinsefm import ( + RinseFMIE, + RinseFMArtistPlaylistIE, +) +from .rmcdecouverte import RMCDecouverteIE +from .rockstargames import RockstarGamesIE +from .rokfin import ( + RokfinIE, + RokfinStackIE, + RokfinChannelIE, + RokfinSearchIE, +) +from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE +from .rottentomatoes import RottenTomatoesIE +from .rozhlas import ( + RozhlasIE, + RozhlasVltavaIE, + MujRozhlasIE, +) +from .rte import RteIE, RteRadioIE +from .rtlnl import ( + RtlNlIE, + RTLLuTeleVODIE, + RTLLuArticleIE, + RTLLuLiveIE, + RTLLuRadioIE, +) +from .rtl2 import RTL2IE +from .rtnews import ( + RTNewsIE, + RTDocumentryIE, + RTDocumentryPlaylistIE, + RuptlyIE, +) +from .rtp import RTPIE +from .rtrfm import RTRFMIE +from .rts import RTSIE +from .rtvcplay import ( + RTVCPlayIE, + RTVCPlayEmbedIE, + RTVCKalturaIE, +) +from .rtve import ( + RTVEALaCartaIE, + RTVEAudioIE, + RTVELiveIE, + RTVEInfantilIE, + RTVETelevisionIE, +) +from .rtvs import RTVSIE +from .rtvslo import RTVSLOIE +from .rule34video import Rule34VideoIE +from .rumble import ( + RumbleEmbedIE, + RumbleIE, + RumbleChannelIE, +) +from .rudovideo import RudoVideoIE +from .rutube import ( + RutubeIE, + RutubeChannelIE, + RutubeEmbedIE, + RutubeMovieIE, + RutubePersonIE, + RutubePlaylistIE, + RutubeTagsIE, +) +from .glomex import ( + GlomexIE, + GlomexEmbedIE, +) +from .megatvcom import ( + MegaTVComIE, + MegaTVComEmbedIE, +) +from .antenna import ( + AntennaGrWatchIE, + Ant1NewsGrArticleIE, + Ant1NewsGrEmbedIE, +) +from .rutv import RUTVIE +from .ruutu import RuutuIE +from .ruv import ( + RuvIE, + RuvSpilaIE +) +from .s4c import ( + S4CIE, + S4CSeriesIE +) +from .safari import ( + SafariIE, + SafariApiIE, + SafariCourseIE, +) +from .saitosan import SaitosanIE +from .samplefocus import SampleFocusIE +from .sapo import SapoIE +from .sbs import SBSIE +from .sbscokr import ( + SBSCoKrIE, + SBSCoKrAllvodProgramIE, + SBSCoKrProgramsVodIE, +) +from .screen9 import Screen9IE +from .screencast import ScreencastIE +from .screencastify import ScreencastifyIE +from .screencastomatic import ScreencastOMaticIE +from .scrippsnetworks import ( + ScrippsNetworksWatchIE, + ScrippsNetworksIE, +) +from .scte import ( + SCTEIE, + SCTECourseIE, +) +from .scrolller import ScrolllerIE +from .sejmpl import SejmIE +from .senalcolombia import SenalColombiaLiveIE +from .senategov import SenateISVPIE, SenateGovIE +from .sendtonews import SendtoNewsIE +from .servus import ServusIE +from .sevenplus import SevenPlusIE +from .sexu import SexuIE +from .seznamzpravy import ( + SeznamZpravyIE, + SeznamZpravyArticleIE, +) +from .shahid import ( + ShahidIE, + ShahidShowIE, +) +from .sharevideos import ShareVideosEmbedIE +from .sibnet import SibnetEmbedIE +from .shemaroome import ShemarooMeIE +from .showroomlive import ShowRoomLiveIE +from .simplecast import ( + SimplecastIE, + SimplecastEpisodeIE, + SimplecastPodcastIE, +) +from .sina import SinaIE +from .sixplay import SixPlayIE +from .skeb import SkebIE +from .skyit import ( + SkyItPlayerIE, + SkyItVideoIE, + SkyItVideoLiveIE, + SkyItIE, + SkyItArteIE, + CieloTVItIE, + TV8ItIE, +) +from .skylinewebcams import SkylineWebcamsIE +from .skynewsarabia import ( + SkyNewsArabiaIE, + SkyNewsArabiaArticleIE, +) +from .skynewsau import SkyNewsAUIE +from .sky import ( + SkyNewsIE, + SkyNewsStoryIE, + SkySportsIE, + SkySportsNewsIE, +) +from .slideshare import SlideshareIE +from .slideslive import SlidesLiveIE +from .slutload import SlutloadIE +from .smotrim import SmotrimIE +from .snotr import SnotrIE +from .sohu import ( + SohuIE, + SohuVIE, +) +from .sonyliv import ( + SonyLIVIE, + SonyLIVSeriesIE, +) +from .soundcloud import ( + SoundcloudEmbedIE, + SoundcloudIE, + SoundcloudSetIE, + SoundcloudRelatedIE, + SoundcloudUserIE, + SoundcloudUserPermalinkIE, + SoundcloudTrackStationIE, + SoundcloudPlaylistIE, + SoundcloudSearchIE, +) +from .soundgasm import ( + SoundgasmIE, + SoundgasmProfileIE +) +from .southpark import ( + SouthParkIE, + SouthParkDeIE, + SouthParkDkIE, + SouthParkEsIE, + SouthParkLatIE, + SouthParkNlIE +) +from .sovietscloset import ( + SovietsClosetIE, + SovietsClosetPlaylistIE +) +from .spankbang import ( + SpankBangIE, + SpankBangPlaylistIE, +) +from .spiegel import SpiegelIE +from .spike import ( + BellatorIE, + ParamountNetworkIE, +) +from .stageplus import StagePlusVODConcertIE +from .startrek import StarTrekIE +from .stitcher import ( + StitcherIE, + StitcherShowIE, +) +from .sport5 import Sport5IE +from .sportbox import SportBoxIE +from .sportdeutschland import SportDeutschlandIE +from .spotify import ( + SpotifyIE, + SpotifyShowIE, +) +from .spreaker import ( + SpreakerIE, + SpreakerPageIE, + SpreakerShowIE, + SpreakerShowPageIE, +) +from .springboardplatform import SpringboardPlatformIE +from .sprout import SproutIE +from .srgssr import ( + SRGSSRIE, + SRGSSRPlayIE, +) +from .srmediathek import SRMediathekIE +from .stacommu import ( + StacommuLiveIE, + StacommuVODIE, + TheaterComplexTownVODIE, + TheaterComplexTownPPVIE, +) +from .stanfordoc import StanfordOpenClassroomIE +from .startv import StarTVIE +from .steam import ( + SteamIE, + SteamCommunityBroadcastIE, +) +from .storyfire import ( + StoryFireIE, + StoryFireUserIE, + StoryFireSeriesIE, +) +from .streamable import StreamableIE +from .streamcz import StreamCZIE +from .streetvoice import StreetVoiceIE +from .stretchinternet import StretchInternetIE +from .stripchat import StripchatIE +from .stv import STVPlayerIE +from .substack import SubstackIE +from .sunporno import SunPornoIE +from .sverigesradio import ( + SverigesRadioEpisodeIE, + SverigesRadioPublicationIE, +) +from .svt import ( + SVTIE, + SVTPageIE, + SVTPlayIE, + SVTSeriesIE, +) +from .swearnet import SwearnetEpisodeIE +from .syvdk import SYVDKIE +from .syfy import SyfyIE +from .sztvhu import SztvHuIE +from .tagesschau import TagesschauIE +from .tass import TassIE +from .tbs import TBSIE +from .tbsjp import ( + TBSJPEpisodeIE, + TBSJPProgramIE, + TBSJPPlaylistIE, +) +from .teachable import ( + TeachableIE, + TeachableCourseIE, +) +from .teachertube import ( + TeacherTubeIE, + TeacherTubeUserIE, +) +from .teachingchannel import TeachingChannelIE +from .teamcoco import ( + TeamcocoIE, + ConanClassicIE, +) +from .teamtreehouse import TeamTreeHouseIE +from .ted import ( + TedEmbedIE, + TedPlaylistIE, + TedSeriesIE, + TedTalkIE, +) +from .tele5 import Tele5IE +from .tele13 import Tele13IE +from .telebruxelles import TeleBruxellesIE +from .telecaribe import TelecaribePlayIE +from .telecinco import TelecincoIE +from .telegraaf import TelegraafIE +from .telegram import TelegramEmbedIE +from .telemb import TeleMBIE +from .telemundo import TelemundoIE +from .telequebec import ( + TeleQuebecIE, + TeleQuebecSquatIE, + TeleQuebecEmissionIE, + TeleQuebecLiveIE, + TeleQuebecVideoIE, +) +from .teletask import TeleTaskIE +from .telewebion import TelewebionIE +from .tempo import TempoIE, IVXPlayerIE +from .tencent import ( + IflixEpisodeIE, + IflixSeriesIE, + VQQSeriesIE, + VQQVideoIE, + WeTvEpisodeIE, + WeTvSeriesIE, +) +from .tennistv import TennisTVIE +from .tenplay import ( + TenPlayIE, + TenPlaySeasonIE, +) +from .testurl import TestURLIE +from .tf1 import TF1IE +from .tfo import TFOIE +from .theguardian import ( + TheGuardianPodcastIE, + TheGuardianPodcastPlaylistIE, +) +from .theholetv import TheHoleTvIE +from .theintercept import TheInterceptIE +from .theplatform import ( + ThePlatformIE, + ThePlatformFeedIE, +) +from .thestar import TheStarIE +from .thesun import TheSunIE +from .theweatherchannel import TheWeatherChannelIE +from .thisamericanlife import ThisAmericanLifeIE +from .thisoldhouse import ThisOldHouseIE +from .thisvid import ( + ThisVidIE, + ThisVidMemberIE, + ThisVidPlaylistIE, +) +from .threespeak import ( + ThreeSpeakIE, + ThreeSpeakUserIE, +) +from .threeqsdn import ThreeQSDNIE +from .tiktok import ( + TikTokIE, + TikTokUserIE, + TikTokSoundIE, + TikTokEffectIE, + TikTokTagIE, + TikTokVMIE, + TikTokLiveIE, + DouyinIE, +) +from .tmz import TMZIE +from .tnaflix import ( + TNAFlixNetworkEmbedIE, + TNAFlixIE, + EMPFlixIE, + MovieFapIE, +) +from .toggle import ( + ToggleIE, + MeWatchIE, +) +from .toggo import ( + ToggoIE, +) +from .tonline import TOnlineIE +from .toongoggles import ToonGogglesIE +from .toutv import TouTvIE +from .toypics import ToypicsUserIE, ToypicsIE +from .traileraddict import TrailerAddictIE +from .triller import ( + TrillerIE, + TrillerUserIE, + TrillerShortIE, +) +from .trovo import ( + TrovoIE, + TrovoVodIE, + TrovoChannelVodIE, + TrovoChannelClipIE, +) +from .trtcocuk import TrtCocukVideoIE +from .trtworld import TrtWorldIE +from .trueid import TrueIDIE +from .trunews import TruNewsIE +from .truth import TruthIE +from .trutv import TruTVIE +from .tube8 import Tube8IE +from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE +from .tubitv import ( + TubiTvIE, + TubiTvShowIE, +) +from .tumblr import TumblrIE +from .tunein import ( + TuneInStationIE, + TuneInPodcastIE, + TuneInPodcastEpisodeIE, + TuneInShortenerIE, +) +from .tv2 import ( + TV2IE, + TV2ArticleIE, + KatsomoIE, + MTVUutisetArticleIE, +) +from .tv24ua import ( + TV24UAVideoIE, +) +from .tv2dk import ( + TV2DKIE, + TV2DKBornholmPlayIE, +) +from .tv2hu import ( + TV2HuIE, + TV2HuSeriesIE, +) +from .tv4 import TV4IE +from .tv5mondeplus import TV5MondePlusIE +from .tv5unis import ( + TV5UnisVideoIE, + TV5UnisIE, +) +from .tva import ( + TVAIE, + QubIE, +) +from .tvanouvelles import ( + TVANouvellesIE, + TVANouvellesArticleIE, +) +from .tvc import ( + TVCIE, + TVCArticleIE, +) +from .tver import TVerIE +from .tvigle import TvigleIE +from .tviplayer import TVIPlayerIE +from .tvland import TVLandIE +from .tvn24 import TVN24IE +from .tvnoe import TVNoeIE +from .tvopengr import ( + TVOpenGrWatchIE, + TVOpenGrEmbedIE, +) +from .tvp import ( + TVPEmbedIE, + TVPIE, + TVPStreamIE, + TVPVODSeriesIE, + TVPVODVideoIE, +) +from .tvplay import ( + TVPlayIE, + TVPlayHomeIE, +) +from .tvplayer import TVPlayerIE +from .tweakers import TweakersIE +from .twentymin import TwentyMinutenIE +from .twentythreevideo import TwentyThreeVideoIE +from .twitcasting import ( + TwitCastingIE, + TwitCastingLiveIE, + TwitCastingUserIE, +) +from .twitch import ( + TwitchVodIE, + TwitchCollectionIE, + TwitchVideosIE, + TwitchVideosClipsIE, + TwitchVideosCollectionsIE, + TwitchStreamIE, + TwitchClipsIE, +) +from .twitter import ( + TwitterCardIE, + TwitterIE, + TwitterAmplifyIE, + TwitterBroadcastIE, + TwitterSpacesIE, + TwitterShortenerIE, +) +from .txxx import ( + TxxxIE, + PornTopIE, +) +from .udemy import ( + UdemyIE, + UdemyCourseIE +) +from .udn import UDNEmbedIE +from .ufctv import ( + UFCTVIE, + UFCArabiaIE, +) +from .ukcolumn import UkColumnIE +from .uktvplay import UKTVPlayIE +from .digiteka import DigitekaIE +from .dlive import ( + DLiveVODIE, + DLiveStreamIE, +) +from .drooble import DroobleIE +from .umg import UMGDeIE +from .unistra import UnistraIE +from .unity import UnityIE +from .unsupported import KnownDRMIE, KnownPiracyIE +from .uol import UOLIE +from .uplynk import ( + UplynkIE, + UplynkPreplayIE, +) +from .urort import UrortIE +from .urplay import URPlayIE +from .usanetwork import USANetworkIE +from .usatoday import USATodayIE +from .ustream import UstreamIE, UstreamChannelIE +from .ustudio import ( + UstudioIE, + UstudioEmbedIE, +) +from .utreon import UtreonIE +from .varzesh3 import Varzesh3IE +from .vbox7 import Vbox7IE +from .veo import VeoIE +from .veoh import ( + VeohIE, + VeohUserIE +) +from .vesti import VestiIE +from .vevo import ( + VevoIE, + VevoPlaylistIE, +) +from .vgtv import ( + BTArticleIE, + BTVestlendingenIE, + VGTVIE, +) +from .vh1 import VH1IE +from .vice import ( + ViceIE, + ViceArticleIE, + ViceShowIE, +) +from .viddler import ViddlerIE +from .videa import VideaIE +from .videocampus_sachsen import ( + VideocampusSachsenIE, + ViMPPlaylistIE, +) +from .videodetective import VideoDetectiveIE +from .videofyme import VideofyMeIE +from .videoken import ( + VideoKenIE, + VideoKenPlayerIE, + VideoKenPlaylistIE, + VideoKenCategoryIE, + VideoKenTopicIE, +) +from .videomore import ( + VideomoreIE, + VideomoreVideoIE, + VideomoreSeasonIE, +) +from .videopress import VideoPressIE +from .vidio import ( + VidioIE, + VidioPremierIE, + VidioLiveIE +) +from .vidlii import VidLiiIE +from .vidly import VidlyIE +from .viewlift import ( + ViewLiftIE, + ViewLiftEmbedIE, +) +from .viidea import ViideaIE +from .vimeo import ( + VimeoIE, + VimeoAlbumIE, + VimeoChannelIE, + VimeoGroupsIE, + VimeoLikesIE, + VimeoOndemandIE, + VimeoProIE, + VimeoReviewIE, + VimeoUserIE, + VimeoWatchLaterIE, + VHXEmbedIE, +) +from .vimm import ( + VimmIE, + VimmRecordingIE, +) +from .vine import ( + VineIE, + VineUserIE, +) +from .viki import ( + VikiIE, + VikiChannelIE, +) +from .viously import ViouslyIE +from .viqeo import ViqeoIE +from .viu import ( + ViuIE, + ViuPlaylistIE, + ViuOTTIE, + ViuOTTIndonesiaIE, +) +from .vk import ( + VKIE, + VKUserVideosIE, + VKWallPostIE, + VKPlayIE, + VKPlayLiveIE, +) +from .vocaroo import VocarooIE +from .vodpl import VODPlIE +from .vodplatform import VODPlatformIE +from .voicy import ( + VoicyIE, + VoicyChannelIE, +) +from .volejtv import VolejTVIE +from .voot import ( + VootIE, + VootSeriesIE, +) +from .voxmedia import ( + VoxMediaVolumeIE, + VoxMediaIE, +) +from .vrt import ( + VRTIE, + VrtNUIE, + KetnetIE, + DagelijkseKostIE, +) +from .vtm import VTMIE +from .medialaan import MedialaanIE +from .vuclip import VuClipIE +from .vvvvid import ( + VVVVIDIE, + VVVVIDShowIE, +) +from .walla import WallaIE +from .washingtonpost import ( + WashingtonPostIE, + WashingtonPostArticleIE, +) +from .wat import WatIE +from .wdr import ( + WDRIE, + WDRPageIE, + WDRElefantIE, + WDRMobileIE, +) +from .webcamerapl import WebcameraplIE +from .webcaster import ( + WebcasterIE, + WebcasterFeedIE, +) +from .webofstories import ( + WebOfStoriesIE, + WebOfStoriesPlaylistIE, +) +from .weibo import ( + WeiboIE, + WeiboVideoIE, + WeiboUserIE, +) +from .weiqitv import WeiqiTVIE +from .weverse import ( + WeverseIE, + WeverseMediaIE, + WeverseMomentIE, + WeverseLiveTabIE, + WeverseMediaTabIE, + WeverseLiveIE, +) +from .wevidi import WeVidiIE +from .weyyak import WeyyakIE +from .whyp import WhypIE +from .wikimedia import WikimediaIE +from .wimbledon import WimbledonIE +from .wimtv import WimTVIE +from .whowatch import WhoWatchIE +from .wistia import ( + WistiaIE, + WistiaPlaylistIE, + WistiaChannelIE, +) +from .wordpress import ( + WordpressPlaylistEmbedIE, + WordpressMiniAudioPlayerEmbedIE, +) +from .worldstarhiphop import WorldStarHipHopIE +from .wppilot import ( + WPPilotIE, + WPPilotChannelsIE, +) +from .wrestleuniverse import ( + WrestleUniverseVODIE, + WrestleUniversePPVIE, +) +from .wsj import ( + WSJIE, + WSJArticleIE, +) +from .wwe import WWEIE +from .wykop import ( + WykopDigIE, + WykopDigCommentIE, + WykopPostIE, + WykopPostCommentIE, +) +from .xanimu import XanimuIE +from .xboxclips import XboxClipsIE +from .xfileshare import XFileShareIE +from .xhamster import ( + XHamsterIE, + XHamsterEmbedIE, + XHamsterUserIE, +) +from .ximalaya import ( + XimalayaIE, + XimalayaAlbumIE +) +from .xinpianchang import XinpianchangIE +from .xminus import XMinusIE +from .xnxx import XNXXIE +from .xstream import XstreamIE +from .xvideos import ( + XVideosIE, + XVideosQuickiesIE +) +from .xxxymovies import XXXYMoviesIE +from .yahoo import ( + YahooIE, + YahooSearchIE, + YahooJapanNewsIE, +) +from .yandexdisk import YandexDiskIE +from .yandexmusic import ( + YandexMusicTrackIE, + YandexMusicAlbumIE, + YandexMusicPlaylistIE, + YandexMusicArtistTracksIE, + YandexMusicArtistAlbumsIE, +) +from .yandexvideo import ( + YandexVideoIE, + YandexVideoPreviewIE, + ZenYandexIE, + ZenYandexChannelIE, +) +from .yapfiles import YapFilesIE +from .yappy import ( + YappyIE, + YappyProfileIE, +) +from .yle_areena import YleAreenaIE +from .youjizz import YouJizzIE +from .youku import ( + YoukuIE, + YoukuShowIE, +) +from .younow import ( + YouNowLiveIE, + YouNowChannelIE, + YouNowMomentIE, +) +from .youporn import YouPornIE +from .yourporn import YourPornIE +from .yourupload import YourUploadIE +from .zaiko import ( + ZaikoIE, + ZaikoETicketIE, +) +from .zapiks import ZapiksIE +from .zattoo import ( + BBVTVIE, + BBVTVLiveIE, + BBVTVRecordingsIE, + EinsUndEinsTVIE, + EinsUndEinsTVLiveIE, + EinsUndEinsTVRecordingsIE, + EWETVIE, + EWETVLiveIE, + EWETVRecordingsIE, + GlattvisionTVIE, + GlattvisionTVLiveIE, + GlattvisionTVRecordingsIE, + MNetTVIE, + MNetTVLiveIE, + MNetTVRecordingsIE, + NetPlusTVIE, + NetPlusTVLiveIE, + NetPlusTVRecordingsIE, + OsnatelTVIE, + OsnatelTVLiveIE, + OsnatelTVRecordingsIE, + QuantumTVIE, + QuantumTVLiveIE, + QuantumTVRecordingsIE, + SaltTVIE, + SaltTVLiveIE, + SaltTVRecordingsIE, + SAKTVIE, + SAKTVLiveIE, + SAKTVRecordingsIE, + VTXTVIE, + VTXTVLiveIE, + VTXTVRecordingsIE, + WalyTVIE, + WalyTVLiveIE, + WalyTVRecordingsIE, + ZattooIE, + ZattooLiveIE, + ZattooMoviesIE, + ZattooRecordingsIE, +) +from .zdf import ZDFIE, ZDFChannelIE +from .zee5 import ( + Zee5IE, + Zee5SeriesIE, +) +from .zeenews import ZeeNewsIE +from .zenporn import ZenPornIE +from .zetland import ZetlandDKArticleIE +from .zhihu import ZhihuIE +from .zingmp3 import ( + ZingMp3IE, + ZingMp3AlbumIE, + ZingMp3ChartHomeIE, + ZingMp3WeekChartIE, + ZingMp3ChartMusicVideoIE, + ZingMp3UserIE, + ZingMp3HubIE, + ZingMp3LiveRadioIE, + ZingMp3PodcastEpisodeIE, + ZingMp3PodcastIE, +) +from .zoom import ZoomIE +from .zype import ZypeIE diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py new file mode 100644 index 0000000..b217422 --- /dev/null +++ b/yt_dlp/extractor/abc.py @@ -0,0 +1,421 @@ +import hashlib +import hmac +import re +import time + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + dict_get, + ExtractorError, + js_to_json, + int_or_none, + parse_iso8601, + str_or_none, + traverse_obj, + try_get, + unescapeHTML, + update_url_query, + url_or_none, +) + + +class ABCIE(InfoExtractor): + IE_NAME = 'abc.net.au' + _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/(?:news|btn)/(?:[^/]+/){1,4}(?P\d{5,})' + + _TESTS = [{ + 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', + 'md5': 'cb3dd03b18455a661071ee1e28344d9f', + 'info_dict': { + 'id': '5868334', + 'ext': 'mp4', + 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone', + 'description': 'md5:809ad29c67a05f54eb41f2a105693a67', + }, + 'skip': 'this video has expired', + }, { + 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326', + 'md5': '4ebd61bdc82d9a8b722f64f1f4b4d121', + 'info_dict': { + 'id': 'NvqvPeNZsHU', + 'ext': 'mp4', + 'upload_date': '20150816', + 'uploader': 'ABC News (Australia)', + 'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef', + 'uploader_id': 'NewsOnABC', + 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill', + }, + 'add_ie': ['Youtube'], + 'skip': 'Not accessible from Travis CI server', + }, { + 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080', + 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f', + 'info_dict': { + 'id': '6880080', + 'ext': 'mp3', + 'title': 'NAB lifts interest rates, following Westpac and CBA', + 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728', + }, + }, { + 'url': 'http://www.abc.net.au/news/2015-10-19/6866214', + 'only_matching': True, + }, { + 'url': 'https://www.abc.net.au/btn/classroom/wwi-centenary/10527914', + 'info_dict': { + 'id': '10527914', + 'ext': 'mp4', + 'title': 'WWI Centenary', + 'description': 'md5:c2379ec0ca84072e86b446e536954546', + } + }, { + 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', + 'info_dict': { + 'id': '12342074', + 'ext': 'mp4', + 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', + 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', + } + }, { + 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', + 'info_dict': { + 'id': 'tDL8Ld4dK_8', + 'ext': 'mp4', + 'title': 'Fortnite Banned From Apple and Google App Stores', + 'description': 'md5:a6df3f36ce8f816b74af4bd6462f5651', + 'upload_date': '20200813', + 'uploader': 'Behind the News', + 'uploader_id': 'behindthenews', + } + }, { + 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', + 'info_dict': { + 'id': '102520540', + 'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus', + 'ext': 'mp4', + 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', + 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + mobj = re.search(r'[^"]+)"\s+data-duration="\d+"\s+title="Download audio directly">', webpage) + if mobj: + urls_info = mobj.groupdict() + youtube = False + video = False + else: + mobj = re.search(r'External Link:', + webpage) + if mobj is None: + mobj = re.search(r'' + + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root', default=None) + if xml_root is None: + # Probably need to authenticate + login_res = self._login(webpage_url, display_id) + if login_res is None: + self.report_warning('Could not login.') + else: + start_page = login_res + # Grab the url from the authenticated page + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root') + + xml_name = self._html_search_regex( + r'', webpage): + url = self._search_regex( + r'src=(["\'])(?P.+?partnerplayer.+?)\1', iframe, + 'player URL', default=None, group='url') + if url: + break + + if not url: + url = self._og_search_url(webpage) + + mobj = re.match( + self._VALID_URL, self._proto_relative_url(url.strip())) + + player_id = mobj.group('player_id') + if not display_id: + display_id = player_id + if player_id: + player_page = self._download_webpage( + url, display_id, note='Downloading player page', + errnote='Could not download player page') + video_id = self._search_regex( + r'\d+)' + _TESTS = [ + { + 'url': 'https://pbskids.org/video/molly-of-denali/3030407927', + 'md5': '1ded20a017cc6b53446238f1804ce4c7', + 'info_dict': { + 'id': '3030407927', + 'title': 'Bird in the Hand/Bye-Bye Birdie', + 'channel': 'molly-of-denali', + 'duration': 1540, + 'ext': 'mp4', + 'series': 'Molly of Denali', + 'description': 'md5:d006b2211633685d8ebc8d03b6d5611e', + 'categories': ['Episode'], + 'upload_date': '20190718', + } + }, + { + 'url': 'https://pbskids.org/video/plum-landing/2365205059', + 'md5': '92e5d189851a64ae1d0237a965be71f5', + 'info_dict': { + 'id': '2365205059', + 'title': 'Cooper\'s Favorite Place in Nature', + 'channel': 'plum-landing', + 'duration': 67, + 'ext': 'mp4', + 'series': 'Plum Landing', + 'description': 'md5:657e5fc4356a84ead1c061eb280ff05d', + 'categories': ['Episode'], + 'upload_date': '20140302', + } + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + meta = self._search_json(r'window\._PBS_KIDS_DEEPLINK\s*=', webpage, 'video info', video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + traverse_obj(meta, ('video_obj', 'URI', {url_or_none})), video_id, ext='mp4') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + **traverse_obj(meta, { + 'categories': ('video_obj', 'video_type', {str}, {lambda x: [x] if x else None}), + 'channel': ('show_slug', {str}), + 'description': ('video_obj', 'description', {str}), + 'duration': ('video_obj', 'duration', {int_or_none}), + 'series': ('video_obj', 'program_title', {str}), + 'title': ('video_obj', 'title', {str}), + 'upload_date': ('video_obj', 'air_date', {unified_strdate}), + }) + } diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py new file mode 100644 index 0000000..e27e5a7 --- /dev/null +++ b/yt_dlp/extractor/pearvideo.py @@ -0,0 +1,68 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + qualities, + unified_timestamp, + traverse_obj, +) + + +class PearVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P\d+)' + _TEST = { + 'url': 'http://www.pearvideo.com/video_1076290', + 'info_dict': { + 'id': '1076290', + 'ext': 'mp4', + 'title': '小浣熊在主人家玻璃上滚石头:没砸', + 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d', + 'timestamp': 1494275280, + 'upload_date': '20170508', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + quality = qualities( + ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src')) + + formats = [{ + 'url': mobj.group('url'), + 'format_id': mobj.group('id'), + 'quality': quality(mobj.group('id')), + } for mobj in re.finditer( + r'(?P[a-zA-Z]+)Url\s*=\s*(["\'])(?P(?:https?:)?//.+?)\2', + webpage)] + if not formats: + info = self._download_json( + 'https://www.pearvideo.com/videoStatus.jsp', video_id=video_id, + query={'contId': video_id}, headers={'Referer': url}) + formats = [{ + 'format_id': k, + 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v + } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v] + + title = self._search_regex( + (r']+\bclass=(["\'])video-tt\1[^>]*>(?P[^<]+)', + r'<[^>]+\bdata-title=(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'title', group='value') + description = self._search_regex( + (r']+\bclass=(["\'])summary\1[^>]*>(?P[^<]+)', + r'<[^>]+\bdata-summary=(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'description', default=None, + group='value') or self._html_search_meta('Description', webpage) + timestamp = unified_timestamp(self._search_regex( + r']+\bclass=["\']date["\'][^>]*>([^<]+)', + webpage, 'timestamp', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py new file mode 100644 index 0000000..939c26d --- /dev/null +++ b/yt_dlp/extractor/peekvids.py @@ -0,0 +1,188 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + get_element_by_class, + int_or_none, + merge_dicts, + url_or_none, +) + + +class PeekVidsBaseIE(InfoExtractor): + def _real_extract(self, url): + domain, video_id = self._match_valid_url(url).group('domain', 'id') + webpage = self._download_webpage(url, video_id, expected_status=429) + if '>Rate Limit Exceeded' in webpage: + raise ExtractorError( + f'You are suspected as a bot. Wait, or pass the captcha on the site and provide cookies. {self._login_hint()}', + video_id=video_id, expected=True) + + title = self._html_search_regex(r'(?s)]*>(.+?)', webpage, 'title') + + display_id = video_id + video_id = self._search_regex(r'(?s)]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID') + srcs = self._download_json( + f'https://www.{domain}/v-alt/{video_id}', video_id, + note='Downloading list of source files') + + formats = [] + for k, v in srcs.items(): + f_url = url_or_none(v) + if not f_url: + continue + + height = self._search_regex(r'^data-src(\d{3,})$', k, 'height', default=None) + if not height: + continue + + formats.append({ + 'url': f_url, + 'format_id': height, + 'height': int_or_none(height), + }) + + if not formats: + formats = [{'url': url} for url in srcs.values()] + + info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={}) + info.pop('url', None) + + # may not have found the thumbnail if it was in a list in the ld+json + info.setdefault('thumbnail', self._og_search_thumbnail(webpage)) + detail = (get_element_by_class('detail-video-block', webpage) + or get_element_by_class('detail-block', webpage) or '') + info['description'] = self._html_search_regex( + rf'(?s)(.+?)(?:{re.escape(info.get("description", ""))}\s*<|]*>\s*{re.escape(name)}\s*:\s*(.+?)', + html, name, default='') + return list(filter(None, re.split(r'\s+', l))) + + return merge_dicts({ + 'id': video_id, + 'display_id': display_id, + 'age_limit': 18, + 'formats': formats, + 'categories': cat_tags('Categories', detail), + 'tags': cat_tags('Tags', detail), + 'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None), + }, info) + + +class PeekVidsIE(PeekVidsBaseIE): + _VALID_URL = r'''(?x) + https?://(?:www\.)?(?Ppeekvids\.com)/ + (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=) + (?P[^/?&#]*) + ''' + _TESTS = [{ + 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', + 'md5': '2ff6a357a9717dc9dc9894b51307e9a2', + 'info_dict': { + 'id': '1262717', + 'display_id': 'BSyLMbN0YCd', + 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:0a61df3620de26c0af8963b1a730cd69', + 'timestamp': 1642579329, + 'upload_date': '20220119', + 'duration': 416, + 'view_count': int, + 'age_limit': 18, + 'uploader': 'SEXYhub.com', + 'categories': list, + 'tags': list, + }, + }] + + +class PlayVidsIE(PeekVidsBaseIE): + _VALID_URL = r'https?://(?:www\.)?(?Pplayvids\.com)/(?:embed/|\w\w?/)?(?P[^/?#]*)' + _TESTS = [{ + 'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', + 'md5': '2f12e50213dd65f142175da633c4564c', + 'info_dict': { + 'id': '1978030', + 'display_id': 'U3pBrYhsjXM', + 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:0a61df3620de26c0af8963b1a730cd69', + 'timestamp': 1640435839, + 'upload_date': '20211225', + 'duration': 416, + 'view_count': int, + 'age_limit': 18, + 'uploader': 'SEXYhub.com', + 'categories': list, + 'tags': list, + }, + }, { + 'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', + 'only_matching': True, + }, { + 'url': 'https://www.playvids.com/embed/U3pBrYhsjXM', + 'only_matching': True, + }, { + 'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line', + 'md5': 'e783986e596cafbf46411a174ab42ba6', + 'info_dict': { + 'id': '762385', + 'display_id': 'bKmGLe3IwjZ', + 'ext': 'mp4', + 'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6', + 'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef', + 'timestamp': 1516958544, + 'upload_date': '20180126', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 480, + 'uploader': 'Brazzers', + 'age_limit': 18, + 'view_count': int, + 'categories': list, + 'tags': list, + }, + }, { + 'url': 'https://www.playvids.com/v/47iUho33toY', + 'md5': 'b056b5049d34b648c1e86497cf4febce', + 'info_dict': { + 'id': '700621', + 'display_id': '47iUho33toY', + 'ext': 'mp4', + 'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE', + 'timestamp': 1507052209, + 'upload_date': '20171003', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 332, + 'uploader': 'Cacerenele', + 'age_limit': 18, + 'view_count': int, + 'categories': list, + 'tags': list, + }, + }, { + 'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances', + 'md5': 'efa09be9f031314b7b7e3bc6510cd0df', + 'info_dict': { + 'id': '1523518', + 'display_id': 'z3_7iwWCmqt', + 'ext': 'mp4', + 'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances', + 'timestamp': 1607470323, + 'upload_date': '20201208', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 593, + 'uploader': 'yorours', + 'age_limit': 18, + 'view_count': int, + 'categories': list, + 'tags': list, + }, + }] diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py new file mode 100644 index 0000000..730b239 --- /dev/null +++ b/yt_dlp/extractor/peertube.py @@ -0,0 +1,1647 @@ +import functools +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + format_field, + int_or_none, + parse_resolution, + str_or_none, + try_get, + unified_timestamp, + url_or_none, + urljoin, + OnDemandPagedList, +) + + +class PeerTubeIE(InfoExtractor): + _INSTANCES_RE = r'''(?: + # Taken from https://instances.joinpeertube.org/instances + 0ch\.tv| + 3dctube\.3dcandy\.social| + all\.electric\.kitchen| + alterscope\.fr| + anarchy\.tube| + apathy\.tv| + apertatube\.net| + archive\.nocopyrightintended\.tv| + archive\.reclaim\.tv| + area51\.media| + astrotube-ufe\.obspm\.fr| + astrotube\.obspm\.fr| + audio\.freediverse\.com| + azxtube\.youssefc\.tn| + bark\.video| + battlepenguin\.video| + bava\.tv| + bee-tube\.fr| + beetoons\.tv| + biblion\.refchat\.net| + biblioteca\.theowlclub\.net| + bideoak\.argia\.eus| + bideoteka\.eus| + birdtu\.be| + bitcointv\.com| + bonn\.video| + breeze\.tube| + brioco\.live| + brocosoup\.fr| + canal\.facil\.services| + canard\.tube| + cdn01\.tilvids\.com| + celluloid-media\.huma-num\.fr| + chicago1\.peertube\.support| + cliptube\.org| + cloudtube\.ise\.fraunhofer\.de| + comf\.tube| + comics\.peertube\.biz| + commons\.tube| + communitymedia\.video| + conspiracydistillery\.com| + crank\.recoil\.org| + dalek\.zone| + dalliance\.network| + dangly\.parts| + darkvapor\.nohost\.me| + daschauher\.aksel\.rocks| + digitalcourage\.video| + displayeurope\.video| + ds106\.tv| + dud-video\.inf\.tu-dresden\.de| + dud175\.inf\.tu-dresden\.de| + dytube\.com| + ebildungslabor\.video| + evangelisch\.video| + fair\.tube| + fedi\.video| + fedimovie\.com| + fediverse\.tv| + film\.k-prod\.fr| + flipboard\.video| + foss\.video| + fossfarmers\.company| + fotogramas\.politicaconciencia\.org| + freediverse\.com| + freesoto-u2151\.vm\.elestio\.app| + freesoto\.tv| + garr\.tv| + greatview\.video| + grypstube\.uni-greifswald\.de| + habratube\.site| + ilbjach\.ru| + infothema\.net| + itvplus\.iiens\.net| + johnydeep\.net| + juggling\.digital| + jupiter\.tube| + kadras\.live| + kino\.kompot\.si| + kino\.schuerz\.at| + kinowolnosc\.pl| + kirche\.peertube-host\.de| + kiwi\.froggirl\.club| + kodcast\.com| + kolektiva\.media| + kpop\.22x22\.ru| + kumi\.tube| + la2\.peertube\.support| + la3\.peertube\.support| + la4\.peertube\.support| + lastbreach\.tv| + lawsplaining\.peertube\.biz| + leopard\.tube| + live\.codinglab\.ch| + live\.libratoi\.org| + live\.oldskool\.fi| + live\.solari\.com| + lucarne\.balsamine\.be| + luxtube\.lu| + makertube\.net| + media\.econoalchemist\.com| + media\.exo\.cat| + media\.fsfe\.org| + media\.gzevd\.de| + media\.interior\.edu\.uy| + media\.krashboyz\.org| + media\.mzhd\.de| + media\.smz-ma\.de| + media\.theplattform\.net| + media\.undeadnetwork\.de| + medias\.debrouillonet\.org| + medias\.pingbase\.net| + mediatube\.fermalo\.fr| + melsungen\.peertube-host\.de| + merci-la-police\.fr| + mindlyvideos\.com| + mirror\.peertube\.metalbanana\.net| + mirrored\.rocks| + mix\.video| + mountaintown\.video| + movies\.metricsmaster\.eu| + mtube\.mooo\.com| + mytube\.kn-cloud\.de| + mytube\.le5emeaxe\.fr| + mytube\.madzel\.de| + nadajemy\.com| + nanawel-peertube\.dyndns\.org| + neat\.tube| + nethack\.tv| + nicecrew\.tv| + nightshift\.minnix\.dev| + nolog\.media| + nyltube\.nylarea\.com| + ocfedtest\.hosted\.spacebear\.ee| + openmedia\.edunova\.it| + p2ptv\.ru| + p\.eertu\.be| + p\.lu| + pastafriday\.club| + patriottube\.sonsofliberty\.red| + pcbu\.nl| + peer\.azurs\.fr| + peer\.d0g4\.me| + peer\.lukeog\.com| + peer\.madiator\.cloud| + peer\.raise-uav\.com| + peershare\.togart\.de| + peertube-blablalinux\.be| + peertube-demo\.learning-hub\.fr| + peertube-docker\.cpy\.re| + peertube-eu\.howlround\.com| + peertube-u5014\.vm\.elestio\.app| + peertube-us\.howlround\.com| + peertube\.020\.pl| + peertube\.0x5e\.eu| + peertube\.1984\.cz| + peertube\.2i2l\.net| + peertube\.adjutor\.xyz| + peertube\.adresse\.data\.gouv\.fr| + peertube\.alpharius\.io| + peertube\.am-networks\.fr| + peertube\.anduin\.net| + peertube\.anti-logic\.com| + peertube\.arch-linux\.cz| + peertube\.art3mis\.de| + peertube\.artsrn\.ualberta\.ca| + peertube\.askan\.info| + peertube\.astral0pitek\.synology\.me| + peertube\.atsuchan\.page| + peertube\.automat\.click| + peertube\.b38\.rural-it\.org| + peertube\.be| + peertube\.beeldengeluid\.nl| + peertube\.bgzashtita\.es| + peertube\.bike| + peertube\.bildung-ekhn\.de| + peertube\.biz| + peertube\.br0\.fr| + peertube\.bridaahost\.ynh\.fr| + peertube\.bubbletea\.dev| + peertube\.bubuit\.net| + peertube\.cabaal\.net| + peertube\.chatinbit\.com| + peertube\.chaunchy\.com| + peertube\.chir\.rs| + peertube\.christianpacaud\.com| + peertube\.chtisurel\.net| + peertube\.chuggybumba\.com| + peertube\.cipherbliss\.com| + peertube\.cirkau\.art| + peertube\.cloud\.nerdraum\.de| + peertube\.cloud\.sans\.pub| + peertube\.coko\.foundation| + peertube\.communecter\.org| + peertube\.concordia\.social| + peertube\.corrigan\.xyz| + peertube\.cpge-brizeux\.fr| + peertube\.ctseuro\.com| + peertube\.cuatrolibertades\.org| + peertube\.cube4fun\.net| + peertube\.dair-institute\.org| + peertube\.davigge\.com| + peertube\.dc\.pini\.fr| + peertube\.deadtom\.me| + peertube\.debian\.social| + peertube\.delta0189\.xyz| + peertube\.demonix\.fr| + peertube\.designersethiques\.org| + peertube\.desmu\.fr| + peertube\.devol\.it| + peertube\.dk| + peertube\.doesstuff\.social| + peertube\.eb8\.org| + peertube\.education-forum\.com| + peertube\.elforcer\.ru| + peertube\.em\.id\.lv| + peertube\.ethibox\.fr| + peertube\.eu\.org| + peertube\.european-pirates\.eu| + peertube\.eus| + peertube\.euskarabildua\.eus| + peertube\.expi\.studio| + peertube\.familie-berner\.de| + peertube\.familleboisteau\.fr| + peertube\.fedihost\.website| + peertube\.fenarinarsa\.com| + peertube\.festnoz\.de| + peertube\.forteza\.fr| + peertube\.freestorm\.online| + peertube\.functional\.cafe| + peertube\.gaminglinux\.fr| + peertube\.gargantia\.fr| + peertube\.geekgalaxy\.fr| + peertube\.gemlog\.ca| + peertube\.genma\.fr| + peertube\.get-racing\.de| + peertube\.ghis94\.ovh| + peertube\.gidikroon\.eu| + peertube\.giftedmc\.com| + peertube\.grosist\.fr| + peertube\.gruntwerk\.org| + peertube\.gsugambit\.com| + peertube\.hackerfoo\.com| + peertube\.hellsite\.net| + peertube\.helvetet\.eu| + peertube\.histoirescrepues\.fr| + peertube\.home\.x0r\.fr| + peertube\.hyperfreedom\.org| + peertube\.ichigo\.everydayimshuflin\.com| + peertube\.ifwo\.eu| + peertube\.in\.ua| + peertube\.inapurna\.org| + peertube\.informaction\.info| + peertube\.interhop\.org| + peertube\.it| + peertube\.it-arts\.net| + peertube\.jensdiemer\.de| + peertube\.johntheserg\.al| + peertube\.kaleidos\.net| + peertube\.kalua\.im| + peertube\.kcore\.org| + peertube\.keazilla\.net| + peertube\.klaewyss\.fr| + peertube\.kleph\.eu| + peertube\.kodein\.be| + peertube\.kooperatywa\.tech| + peertube\.kriom\.net| + peertube\.kx\.studio| + peertube\.kyriog\.eu| + peertube\.la-famille-muller\.fr| + peertube\.labeuropereunion\.eu| + peertube\.lagvoid\.com| + peertube\.lhc\.net\.br| + peertube\.libresolutions\.network| + peertube\.libretic\.fr| + peertube\.librosphere\.fr| + peertube\.logilab\.fr| + peertube\.lon\.tv| + peertube\.louisematic\.site| + peertube\.luckow\.org| + peertube\.luga\.at| + peertube\.lyceeconnecte\.fr| + peertube\.madixam\.xyz| + peertube\.magicstone\.dev| + peertube\.marienschule\.de| + peertube\.marud\.fr| + peertube\.maxweiss\.io| + peertube\.miguelcr\.me| + peertube\.mikemestnik\.net| + peertube\.mobilsicher\.de| + peertube\.monlycee\.net| + peertube\.mxinfo\.fr| + peertube\.naln1\.ca| + peertube\.netzbegruenung\.de| + peertube\.nicolastissot\.fr| + peertube\.nogafam\.fr| + peertube\.normalgamingcommunity\.cz| + peertube\.nz| + peertube\.offerman\.com| + peertube\.ohioskates\.com| + peertube\.onionstorm\.net| + peertube\.opencloud\.lu| + peertube\.otakufarms\.com| + peertube\.paladyn\.org| + peertube\.pix-n-chill\.fr| + peertube\.r2\.enst\.fr| + peertube\.r5c3\.fr| + peertube\.redpill-insight\.com| + peertube\.researchinstitute\.at| + peertube\.revelin\.fr| + peertube\.rlp\.schule| + peertube\.rokugan\.fr| + peertube\.rougevertbleu\.tv| + peertube\.roundpond\.net| + peertube\.rural-it\.org| + peertube\.satoshishop\.de| + peertube\.scyldings\.com| + peertube\.securitymadein\.lu| + peertube\.semperpax\.com| + peertube\.semweb\.pro| + peertube\.sensin\.eu| + peertube\.sidh\.bzh| + peertube\.skorpil\.cz| + peertube\.smertrios\.com| + peertube\.sqweeb\.net| + peertube\.stattzeitung\.org| + peertube\.stream| + peertube\.su| + peertube\.swrs\.net| + peertube\.takeko\.cyou| + peertube\.taxinachtegel\.de| + peertube\.teftera\.com| + peertube\.teutronic-services\.de| + peertube\.ti-fr\.com| + peertube\.tiennot\.net| + peertube\.tmp\.rcp\.tf| + peertube\.tspu\.edu\.ru| + peertube\.tv| + peertube\.tweb\.tv| + peertube\.underworld\.fr| + peertube\.vapronva\.pw| + peertube\.veen\.world| + peertube\.vesdia\.eu| + peertube\.virtual-assembly\.org| + peertube\.viviers-fibre\.net| + peertube\.vlaki\.cz| + peertube\.wiesbaden\.social| + peertube\.wivodaim\.net| + peertube\.wtf| + peertube\.wtfayla\.net| + peertube\.xrcb\.cat| + peertube\.xwiki\.com| + peertube\.zd\.do| + peertube\.zetamc\.net| + peertube\.zmuuf\.org| + peertube\.zoz-serv\.org| + peertube\.zwindler\.fr| + peervideo\.ru| + periscope\.numenaute\.org| + pete\.warpnine\.de| + petitlutinartube\.fr| + phijkchu\.com| + phoenixproject\.group| + piraten\.space| + pirtube\.calut\.fr| + pityu\.flaki\.hu| + play\.mittdata\.se| + player\.ojamajo\.moe| + podlibre\.video| + portal\.digilab\.nfa\.cz| + private\.fedimovie\.com| + pt01\.lehrerfortbildung-bw\.de| + pt\.diaspodon\.fr| + pt\.freedomwolf\.cc| + pt\.gordons\.gen\.nz| + pt\.ilyamikcoder\.com| + pt\.irnok\.net| + pt\.mezzo\.moe| + pt\.na4\.eu| + pt\.netcraft\.ch| + pt\.rwx\.ch| + pt\.sfunk1x\.com| + pt\.thishorsie\.rocks| + pt\.vern\.cc| + ptb\.lunarviews\.net| + ptube\.de| + ptube\.ranranhome\.info| + puffy\.tube| + puppet\.zone| + qtube\.qlyoung\.net| + quantube\.win| + rankett\.net| + replay\.jres\.org| + review\.peertube\.biz| + sdmtube\.fr| + secure\.direct-live\.net| + secure\.scanovid\.com| + seka\.pona\.la| + serv3\.wiki-tube\.de| + skeptube\.fr| + social\.fedimovie\.com| + socpeertube\.ru| + sovran\.video| + special\.videovortex\.tv| + spectra\.video| + stl1988\.peertube-host\.de| + stream\.biovisata\.lt| + stream\.conesphere\.cloud| + stream\.elven\.pw| + stream\.jurnalfm\.md| + stream\.k-prod\.fr| + stream\.litera\.tools| + stream\.nuemedia\.se| + stream\.rlp-media\.de| + stream\.vrse\.be| + studios\.racer159\.com| + styxhexenhammer666\.com| + syrteplay\.obspm\.fr| + t\.0x0\.st| + tbh\.co-shaoghal\.net| + test-fab\.ynh\.fr| + testube\.distrilab\.fr| + tgi\.hosted\.spacebear\.ee| + theater\.ethernia\.net| + thecool\.tube| + thevideoverse\.com| + tilvids\.com| + tinkerbetter\.tube| + tinsley\.video| + trailers\.ddigest\.com| + tube-action-educative\.apps\.education\.fr| + tube-arts-lettres-sciences-humaines\.apps\.education\.fr| + tube-cycle-2\.apps\.education\.fr| + tube-cycle-3\.apps\.education\.fr| + tube-education-physique-et-sportive\.apps\.education\.fr| + tube-enseignement-professionnel\.apps\.education\.fr| + tube-institutionnel\.apps\.education\.fr| + tube-langues-vivantes\.apps\.education\.fr| + tube-maternelle\.apps\.education\.fr| + tube-numerique-educatif\.apps\.education\.fr| + tube-sciences-technologies\.apps\.education\.fr| + tube-test\.apps\.education\.fr| + tube1\.perron-service\.de| + tube\.9minuti\.it| + tube\.abolivier\.bzh| + tube\.alado\.space| + tube\.amic37\.fr| + tube\.area404\.cloud| + tube\.arthack\.nz| + tube\.asulia\.fr| + tube\.awkward\.company| + tube\.azbyka\.ru| + tube\.azkware\.net| + tube\.bartrip\.me\.uk| + tube\.belowtoxic\.media| + tube\.bingle\.plus| + tube\.bit-friends\.de| + tube\.bstly\.de| + tube\.chosto\.me| + tube\.cms\.garden| + tube\.communia\.org| + tube\.cyberia\.club| + tube\.cybershock\.life| + tube\.dembased\.xyz| + tube\.dev\.displ\.eu| + tube\.digitalesozialearbeit\.de| + tube\.distrilab\.fr| + tube\.doortofreedom\.org| + tube\.dsocialize\.net| + tube\.e-jeremy\.com| + tube\.ebin\.club| + tube\.elemac\.fr| + tube\.erzbistum-hamburg\.de| + tube\.exozy\.me| + tube\.fdn\.fr| + tube\.fedi\.quebec| + tube\.fediverse\.at| + tube\.felinn\.org| + tube\.flokinet\.is| + tube\.foad\.me\.uk| + tube\.freepeople\.fr| + tube\.friloux\.me| + tube\.froth\.zone| + tube\.fulda\.social| + tube\.futuretic\.fr| + tube\.g1zm0\.de| + tube\.g4rf\.net| + tube\.gaiac\.io| + tube\.geekyboo\.net| + tube\.genb\.de| + tube\.ghk-academy\.info| + tube\.gi-it\.de| + tube\.grap\.coop| + tube\.graz\.social| + tube\.grin\.hu| + tube\.hokai\.lol| + tube\.int5\.net| + tube\.interhacker\.space| + tube\.invisible\.ch| + tube\.io18\.top| + tube\.itsg\.host| + tube\.jeena\.net| + tube\.kh-berlin\.de| + tube\.kockatoo\.org| + tube\.kotur\.org| + tube\.koweb\.fr| + tube\.la-dina\.net| + tube\.lab\.nrw| + tube\.lacaveatonton\.ovh| + tube\.laurent-malys\.fr| + tube\.leetdreams\.ch| + tube\.linkse\.media| + tube\.lokad\.com| + tube\.lucie-philou\.com| + tube\.media-techport\.de| + tube\.morozoff\.pro| + tube\.neshweb\.net| + tube\.nestor\.coop| + tube\.network\.europa\.eu| + tube\.nicfab\.eu| + tube\.nieuwwestbrabant\.nl| + tube\.nogafa\.org| + tube\.novg\.net| + tube\.nox-rhea\.org| + tube\.nuagelibre\.fr| + tube\.numerique\.gouv\.fr| + tube\.nuxnik\.com| + tube\.nx12\.net| + tube\.octaplex\.net| + tube\.oisux\.org| + tube\.okcinfo\.news| + tube\.onlinekirche\.net| + tube\.opportunis\.me| + tube\.oraclefilms\.com| + tube\.org\.il| + tube\.pacapime\.ovh| + tube\.parinux\.org| + tube\.pastwind\.top| + tube\.picasoft\.net| + tube\.pilgerweg-21\.de| + tube\.pmj\.rocks| + tube\.pol\.social| + tube\.ponsonaille\.fr| + tube\.portes-imaginaire\.org| + tube\.public\.apolut\.net| + tube\.pustule\.org| + tube\.pyngu\.com| + tube\.querdenken-711\.de| + tube\.rebellion\.global| + tube\.reseau-canope\.fr| + tube\.rhythms-of-resistance\.org| + tube\.risedsky\.ovh| + tube\.rooty\.fr| + tube\.rsi\.cnr\.it| + tube\.ryne\.moe| + tube\.schleuss\.online| + tube\.schule\.social| + tube\.sekretaerbaer\.net| + tube\.shanti\.cafe| + tube\.shela\.nu| + tube\.skrep\.in| + tube\.sleeping\.town| + tube\.sp-codes\.de| + tube\.spdns\.org| + tube\.systerserver\.net| + tube\.systest\.eu| + tube\.tappret\.fr| + tube\.techeasy\.org| + tube\.thierrytalbert\.fr| + tube\.tinfoil-hat\.net| + tube\.toldi\.eu| + tube\.tpshd\.de| + tube\.trax\.im| + tube\.troopers\.agency| + tube\.ttk\.is| + tube\.tuxfriend\.fr| + tube\.tylerdavis\.xyz| + tube\.ullihome\.de| + tube\.ulne\.be| + tube\.undernet\.uy| + tube\.vrpnet\.org| + tube\.wolfe\.casa| + tube\.xd0\.de| + tube\.xn--baw-joa\.social| + tube\.xy-space\.de| + tube\.yapbreak\.fr| + tubedu\.org| + tubulus\.openlatin\.org| + turtleisland\.video| + tututu\.tube| + tv\.adast\.dk| + tv\.adn\.life| + tv\.arns\.lt| + tv\.atmx\.ca| + tv\.based\.quest| + tv\.farewellutopia\.com| + tv\.filmfreedom\.net| + tv\.gravitons\.org| + tv\.io\.seg\.br| + tv\.lumbung\.space| + tv\.pirateradio\.social| + tv\.pirati\.cz| + tv\.santic-zombie\.ru| + tv\.undersco\.re| + tv\.zonepl\.net| + tvox\.ru| + twctube\.twc-zone\.eu| + twobeek\.com| + urbanists\.video| + v\.9tail\.net| + v\.basspistol\.org| + v\.j4\.lc| + v\.kisombrella\.top| + v\.koa\.im| + v\.kyaru\.xyz| + v\.lor\.sh| + v\.mkp\.ca| + v\.posm\.gay| + v\.slaycer\.top| + veedeo\.org| + vhs\.absturztau\.be| + vid\.cthos\.dev| + vid\.kinuseka\.us| + vid\.mkp\.ca| + vid\.nocogabriel\.fr| + vid\.norbipeti\.eu| + vid\.northbound\.online| + vid\.ohboii\.de| + vid\.plantplotting\.co\.uk| + vid\.pretok\.tv| + vid\.prometheus\.systems| + vid\.soafen\.love| + vid\.twhtv\.club| + vid\.wildeboer\.net| + video-cave-v2\.de| + video-liberty\.com| + video\.076\.ne\.jp| + video\.1146\.nohost\.me| + video\.9wd\.eu| + video\.abraum\.de| + video\.ados\.accoord\.fr| + video\.amiga-ng\.org| + video\.anartist\.org| + video\.asgardius\.company| + video\.audiovisuel-participatif\.org| + video\.bards\.online| + video\.barkoczy\.social| + video\.benetou\.fr| + video\.beyondwatts\.social| + video\.bgeneric\.net| + video\.bilecik\.edu\.tr| + video\.blast-info\.fr| + video\.bmu\.cloud| + video\.catgirl\.biz| + video\.causa-arcana\.com| + video\.chasmcity\.net| + video\.chbmeyer\.de| + video\.cigliola\.com| + video\.citizen4\.eu| + video\.clumsy\.computer| + video\.cnnumerique\.fr| + video\.cnr\.it| + video\.cnt\.social| + video\.coales\.co| + video\.comune\.trento\.it| + video\.coyp\.us| + video\.csc49\.fr| + video\.davduf\.net| + video\.davejansen\.com| + video\.dlearning\.nl| + video\.dnfi\.no| + video\.dresden\.network| + video\.drgnz\.club| + video\.dudenas\.lt| + video\.eientei\.org| + video\.ellijaymakerspace\.org| + video\.emergeheart\.info| + video\.eradicatinglove\.xyz| + video\.everythingbagel\.me| + video\.extremelycorporate\.ca| + video\.fabiomanganiello\.com| + video\.fedi\.bzh| + video\.fhtagn\.org| + video\.firehawk-systems\.com| + video\.fox-romka\.ru| + video\.fuss\.bz\.it| + video\.glassbeadcollective\.org| + video\.graine-pdl\.org| + video\.gyt\.is| + video\.hainry\.fr| + video\.hardlimit\.com| + video\.hostux\.net| + video\.igem\.org| + video\.infojournal\.fr| + video\.internet-czas-dzialac\.pl| + video\.interru\.io| + video\.ipng\.ch| + video\.ironsysadmin\.com| + video\.islameye\.com| + video\.jacen\.moe| + video\.jadin\.me| + video\.jeffmcbride\.net| + video\.jigmedatse\.com| + video\.kuba-orlik\.name| + video\.lacalligramme\.fr| + video\.lanceurs-alerte\.fr| + video\.laotra\.red| + video\.lapineige\.fr| + video\.laraffinerie\.re| + video\.lavolte\.net| + video\.liberta\.vip| + video\.libreti\.net| + video\.licentia\.net| + video\.linc\.systems| + video\.linux\.it| + video\.linuxtrent\.it| + video\.liveitlive\.show| + video\.lono\.space| + video\.lrose\.de| + video\.lunago\.net| + video\.lundi\.am| + video\.lycee-experimental\.org| + video\.maechler\.cloud| + video\.marcorennmaus\.de| + video\.mass-trespass\.uk| + video\.matomocamp\.org| + video\.medienzentrum-harburg\.de| + video\.mentality\.rip| + video\.metaversum\.wtf| + video\.midreality\.com| + video\.mttv\.it| + video\.mugoreve\.fr| + video\.mxtthxw\.art| + video\.mycrowd\.ca| + video\.niboe\.info| + video\.nogafam\.es| + video\.nstr\.no| + video\.occm\.cc| + video\.off-investigation\.fr| + video\.olos311\.org| + video\.ordinobsolete\.fr| + video\.osvoj\.ru| + video\.ourcommon\.cloud| + video\.ozgurkon\.org| + video\.pcf\.fr| + video\.pcgaldo\.com| + video\.phyrone\.de| + video\.poul\.org| + video\.publicspaces\.net| + video\.pullopen\.xyz| + video\.r3s\.nrw| + video\.rainevixen\.com| + video\.resolutions\.it| + video\.retroedge\.tech| + video\.rhizome\.org| + video\.rlp-media\.de| + video\.rs-einrich\.de| + video\.rubdos\.be| + video\.sadmin\.io| + video\.sftblw\.moe| + video\.shitposter\.club| + video\.simplex-software\.ru| + video\.slipfox\.xyz| + video\.snug\.moe| + video\.software-fuer-engagierte\.de| + video\.soi\.ch| + video\.sonet\.ws| + video\.surazal\.net| + video\.taskcards\.eu| + video\.team-lcbs\.eu| + video\.techforgood\.social| + video\.telemillevaches\.net| + video\.thepolarbear\.co\.uk| + video\.thinkof\.name| + video\.tii\.space| + video\.tkz\.es| + video\.trankil\.info| + video\.triplea\.fr| + video\.tum\.social| + video\.turbo\.chat| + video\.uriopss-pdl\.fr| + video\.ustim\.ru| + video\.ut0pia\.org| + video\.vaku\.org\.ua| + video\.vegafjord\.me| + video\.veloma\.org| + video\.violoncello\.ch| + video\.voidconspiracy\.band| + video\.wakkeren\.nl| + video\.windfluechter\.org| + video\.ziez\.eu| + videos-passages\.huma-num\.fr| + videos\.aadtp\.be| + videos\.ahp-numerique\.fr| + videos\.alamaisondulibre\.org| + videos\.archigny\.net| + videos\.aroaduntraveled\.com| + videos\.b4tech\.org| + videos\.benjaminbrady\.ie| + videos\.bik\.opencloud\.lu| + videos\.cloudron\.io| + videos\.codingotaku\.com| + videos\.coletivos\.org| + videos\.collate\.social| + videos\.danksquad\.org| + videos\.digitaldragons\.eu| + videos\.dromeadhere\.fr| + videos\.explain-it\.org| + videos\.factsonthegroundshow\.com| + videos\.foilen\.com| + videos\.fsci\.in| + videos\.gamercast\.net| + videos\.gianmarco\.gg| + videos\.globenet\.org| + videos\.grafo\.zone| + videos\.hauspie\.fr| + videos\.hush\.is| + videos\.hyphalfusion\.network| + videos\.icum\.to| + videos\.im\.allmendenetz\.de| + videos\.jacksonchen666\.com| + videos\.john-livingston\.fr| + videos\.knazarov\.com| + videos\.kuoushi\.com| + videos\.laliguepaysdelaloire\.org| + videos\.lemouvementassociatif-pdl\.org| + videos\.leslionsfloorball\.fr| + videos\.librescrum\.org| + videos\.mastodont\.cat| + videos\.metus\.ca| + videos\.miolo\.org| + videos\.offroad\.town| + videos\.openmandriva\.org| + videos\.parleur\.net| + videos\.pcorp\.us| + videos\.pop\.eu\.com| + videos\.rampin\.org| + videos\.rauten\.co\.za| + videos\.ritimo\.org| + videos\.sarcasmstardust\.com| + videos\.scanlines\.xyz| + videos\.shmalls\.pw| + videos\.stadtfabrikanten\.org| + videos\.supertuxkart\.net| + videos\.testimonia\.org| + videos\.thinkerview\.com| + videos\.torrenezzi10\.xyz| + videos\.trom\.tf| + videos\.utsukta\.org| + videos\.viorsan\.com| + videos\.wherelinux\.xyz| + videos\.wikilibriste\.fr| + videos\.yesil\.club| + videos\.yeswiki\.net| + videotube\.duckdns\.org| + vids\.capypara\.de| + vids\.roshless\.me| + vids\.stary\.pc\.pl| + vids\.tekdmn\.me| + vidz\.julien\.ovh| + views\.southfox\.me| + virtual-girls-are\.definitely-for\.me| + viste\.pt| + vnchich\.com| + vnop\.org| + vod\.newellijay\.tv| + voluntarytube\.com| + vtr\.chikichiki\.tube| + vulgarisation-informatique\.fr| + watch\.easya\.solutions| + watch\.goodluckgabe\.life| + watch\.ignorance\.eu| + watch\.jimmydore\.com| + watch\.libertaria\.space| + watch\.nuked\.social| + watch\.ocaml\.org| + watch\.thelema\.social| + watch\.tubelab\.video| + web-fellow\.de| + webtv\.vandoeuvre\.net| + wetubevid\.online| + wikileaks\.video| + wiwi\.video| + wow\.such\.disappointment\.fail| + www\.jvideos\.net| + www\.kotikoff\.net| + www\.makertube\.net| + www\.mypeer\.tube| + www\.nadajemy\.com| + www\.neptube\.io| + www\.rocaguinarda\.tv| + www\.vnshow\.net| + xxivproduction\.video| + yt\.orokoro\.ru| + ytube\.retronerd\.at| + zumvideo\.de| + + # from youtube-dl + peertube\.rainbowswingers\.net| + tube\.stanisic\.nl| + peer\.suiri\.us| + medias\.libox\.fr| + videomensoif\.ynh\.fr| + peertube\.travelpandas\.eu| + peertube\.rachetjay\.fr| + peertube\.montecsys\.fr| + tube\.eskuero\.me| + peer\.tube| + peertube\.umeahackerspace\.se| + tube\.nx-pod\.de| + video\.monsieurbidouille\.fr| + tube\.openalgeria\.org| + vid\.lelux\.fi| + video\.anormallostpod\.ovh| + tube\.crapaud-fou\.org| + peertube\.stemy\.me| + lostpod\.space| + exode\.me| + peertube\.snargol\.com| + vis\.ion\.ovh| + videosdulib\.re| + v\.mbius\.io| + videos\.judrey\.eu| + peertube\.osureplayviewer\.xyz| + peertube\.mathieufamily\.ovh| + www\.videos-libr\.es| + fightforinfo\.com| + peertube\.fediverse\.ru| + peertube\.oiseauroch\.fr| + video\.nesven\.eu| + v\.bearvideo\.win| + video\.qoto\.org| + justporn\.cc| + video\.vny\.fr| + peervideo\.club| + tube\.taker\.fr| + peertube\.chantierlibre\.org| + tube\.ipfixe\.info| + tube\.kicou\.info| + tube\.dodsorf\.as| + videobit\.cc| + video\.yukari\.moe| + videos\.elbinario\.net| + hkvideo\.live| + pt\.tux\.tf| + www\.hkvideo\.live| + FIGHTFORINFO\.com| + pt\.765racing\.com| + peertube\.gnumeria\.eu\.org| + nordenmedia\.com| + peertube\.co\.uk| + tube\.darfweb\.eu| + tube\.kalah-france\.org| + 0ch\.in| + vod\.mochi\.academy| + film\.node9\.org| + peertube\.hatthieves\.es| + video\.fitchfamily\.org| + peertube\.ddns\.net| + video\.ifuncle\.kr| + video\.fdlibre\.eu| + tube\.22decembre\.eu| + peertube\.harmoniescreatives\.com| + tube\.fabrigli\.fr| + video\.thedwyers\.co| + video\.bruitbruit\.com| + peertube\.foxfam\.club| + peer\.philoxweb\.be| + videos\.bugs\.social| + peertube\.malbert\.xyz| + peertube\.bilange\.ca| + libretube\.net| + diytelevision\.com| + peertube\.fedilab\.app| + libre\.video| + video\.mstddntfdn\.online| + us\.tv| + peertube\.sl-network\.fr| + peertube\.dynlinux\.io| + peertube\.david\.durieux\.family| + peertube\.linuxrocks\.online| + peerwatch\.xyz| + v\.kretschmann\.social| + tube\.otter\.sh| + yt\.is\.nota\.live| + tube\.dragonpsi\.xyz| + peertube\.boneheadmedia\.com| + videos\.funkwhale\.audio| + watch\.44con\.com| + peertube\.gcaillaut\.fr| + peertube\.icu| + pony\.tube| + spacepub\.space| + tube\.stbr\.io| + v\.mom-gay\.faith| + tube\.port0\.xyz| + peertube\.simounet\.net| + play\.jergefelt\.se| + peertube\.zeteo\.me| + tube\.danq\.me| + peertube\.kerenon\.com| + tube\.fab-l3\.org| + tube\.calculate\.social| + peertube\.mckillop\.org| + tube\.netzspielplatz\.de| + vod\.ksite\.de| + peertube\.laas\.fr| + tube\.govital\.net| + peertube\.stephenson\.cc| + bistule\.nohost\.me| + peertube\.kajalinifi\.de| + video\.ploud\.jp| + video\.omniatv\.com| + peertube\.ffs2play\.fr| + peertube\.leboulaire\.ovh| + peertube\.tronic-studio\.com| + peertube\.public\.cat| + peertube\.metalbanana\.net| + video\.1000i100\.fr| + peertube\.alter-nativ-voll\.de| + tube\.pasa\.tf| + tube\.worldofhauru\.xyz| + pt\.kamp\.site| + peertube\.teleassist\.fr| + videos\.mleduc\.xyz| + conf\.tube| + media\.privacyinternational\.org| + pt\.forty-two\.nl| + video\.halle-leaks\.de| + video\.grosskopfgames\.de| + peertube\.schaeferit\.de| + peertube\.jackbot\.fr| + tube\.extinctionrebellion\.fr| + peertube\.f-si\.org| + video\.subak\.ovh| + videos\.koweb\.fr| + peertube\.zergy\.net| + peertube\.roflcopter\.fr| + peertube\.floss-marketing-school\.com| + vloggers\.social| + peertube\.iriseden\.eu| + videos\.ubuntu-paris\.org| + peertube\.mastodon\.host| + armstube\.com| + peertube\.s2s\.video| + peertube\.lol| + tube\.open-plug\.eu| + open\.tube| + peertube\.ch| + peertube\.normandie-libre\.fr| + peertube\.slat\.org| + video\.lacaveatonton\.ovh| + peertube\.uno| + peertube\.servebeer\.com| + peertube\.fedi\.quebec| + tube\.h3z\.jp| + tube\.plus200\.com| + peertube\.eric\.ovh| + tube\.metadocs\.cc| + tube\.unmondemeilleur\.eu| + gouttedeau\.space| + video\.antirep\.net| + nrop\.cant\.at| + tube\.ksl-bmx\.de| + tube\.plaf\.fr| + tube\.tchncs\.de| + video\.devinberg\.com| + hitchtube\.fr| + peertube\.kosebamse\.com| + yunopeertube\.myddns\.me| + peertube\.varney\.fr| + peertube\.anon-kenkai\.com| + tube\.maiti\.info| + tubee\.fr| + videos\.dinofly\.com| + toobnix\.org| + videotape\.me| + voca\.tube| + video\.heromuster\.com| + video\.lemediatv\.fr| + video\.up\.edu\.ph| + balafon\.video| + video\.ivel\.fr| + thickrips\.cloud| + pt\.laurentkruger\.fr| + video\.monarch-pass\.net| + peertube\.artica\.center| + video\.alternanet\.fr| + indymotion\.fr| + fanvid\.stopthatimp\.net| + video\.farci\.org| + v\.lesterpig\.com| + video\.okaris\.de| + tube\.pawelko\.net| + peertube\.mablr\.org| + tube\.fede\.re| + pytu\.be| + evertron\.tv| + devtube\.dev-wiki\.de| + raptube\.antipub\.org| + video\.selea\.se| + peertube\.mygaia\.org| + video\.oh14\.de| + peertube\.livingutopia\.org| + peertube\.the-penguin\.de| + tube\.thechangebook\.org| + tube\.anjara\.eu| + pt\.pube\.tk| + video\.samedi\.pm| + mplayer\.demouliere\.eu| + widemus\.de| + peertube\.me| + peertube\.zapashcanon\.fr| + video\.latavernedejohnjohn\.fr| + peertube\.pcservice46\.fr| + peertube\.mazzonetto\.eu| + video\.irem\.univ-paris-diderot\.fr| + video\.livecchi\.cloud| + alttube\.fr| + video\.coop\.tools| + video\.cabane-libre\.org| + peertube\.openstreetmap\.fr| + videos\.alolise\.org| + irrsinn\.video| + video\.antopie\.org| + scitech\.video| + tube2\.nemsia\.org| + video\.amic37\.fr| + peertube\.freeforge\.eu| + video\.arbitrarion\.com| + video\.datsemultimedia\.com| + stoptrackingus\.tv| + peertube\.ricostrongxxx\.com| + docker\.videos\.lecygnenoir\.info| + peertube\.togart\.de| + tube\.postblue\.info| + videos\.domainepublic\.net| + peertube\.cyber-tribal\.com| + video\.gresille\.org| + peertube\.dsmouse\.net| + cinema\.yunohost\.support| + tube\.theocevaer\.fr| + repro\.video| + tube\.4aem\.com| + quaziinc\.com| + peertube\.metawurst\.space| + videos\.wakapo\.com| + video\.ploud\.fr| + video\.freeradical\.zone| + tube\.valinor\.fr| + refuznik\.video| + pt\.kircheneuenburg\.de| + peertube\.asrun\.eu| + peertube\.lagob\.fr| + videos\.side-ways\.net| + 91video\.online| + video\.valme\.io| + video\.taboulisme\.com| + videos-libr\.es| + tv\.mooh\.fr| + nuage\.acostey\.fr| + video\.monsieur-a\.fr| + peertube\.librelois\.fr| + videos\.pair2jeux\.tube| + videos\.pueseso\.club| + peer\.mathdacloud\.ovh| + media\.assassinate-you\.net| + vidcommons\.org| + ptube\.rousset\.nom\.fr| + tube\.cyano\.at| + videos\.squat\.net| + video\.iphodase\.fr| + peertube\.makotoworkshop\.org| + peertube\.serveur\.slv-valbonne\.fr| + vault\.mle\.party| + hostyour\.tv| + videos\.hack2g2\.fr| + libre\.tube| + pire\.artisanlogiciel\.net| + videos\.numerique-en-commun\.fr| + video\.netsyms\.com| + video\.die-partei\.social| + video\.writeas\.org| + peertube\.swarm\.solvingmaz\.es| + tube\.pericoloso\.ovh| + watching\.cypherpunk\.observer| + videos\.adhocmusic\.com| + tube\.rfc1149\.net| + peertube\.librelabucm\.org| + videos\.numericoop\.fr| + peertube\.koehn\.com| + peertube\.anarchmusicall\.net| + tube\.kampftoast\.de| + vid\.y-y\.li| + peertube\.xtenz\.xyz| + diode\.zone| + tube\.egf\.mn| + peertube\.nomagic\.uk| + visionon\.tv| + videos\.koumoul\.com| + video\.rastapuls\.com| + video\.mantlepro\.com| + video\.deadsuperhero\.com| + peertube\.musicstudio\.pro| + peertube\.we-keys\.fr| + artitube\.artifaille\.fr| + peertube\.ethernia\.net| + tube\.midov\.pl| + peertube\.fr| + watch\.snoot\.tube| + peertube\.donnadieu\.fr| + argos\.aquilenet\.fr| + tube\.nemsia\.org| + tube\.bruniau\.net| + videos\.darckoune\.moe| + tube\.traydent\.info| + dev\.videos\.lecygnenoir\.info| + peertube\.nayya\.org| + peertube\.live| + peertube\.mofgao\.space| + video\.lequerrec\.eu| + peertube\.amicale\.net| + aperi\.tube| + tube\.ac-lyon\.fr| + video\.lw1\.at| + www\.yiny\.org| + videos\.pofilo\.fr| + tube\.lou\.lt| + choob\.h\.etbus\.ch| + tube\.hoga\.fr| + peertube\.heberge\.fr| + video\.obermui\.de| + videos\.cloudfrancois\.fr| + betamax\.video| + video\.typica\.us| + tube\.piweb\.be| + video\.blender\.org| + peertube\.cat| + tube\.kdy\.ch| + pe\.ertu\.be| + peertube\.social| + videos\.lescommuns\.org| + tv\.datamol\.org| + videonaute\.fr| + dialup\.express| + peertube\.nogafa\.org| + megatube\.lilomoino\.fr| + peertube\.tamanoir\.foucry\.net| + peertube\.devosi\.org| + peertube\.1312\.media| + tube\.bootlicker\.party| + skeptikon\.fr| + video\.blueline\.mg| + tube\.homecomputing\.fr| + tube\.ouahpiti\.info| + video\.tedomum\.net| + video\.g3l\.org| + fontube\.fr| + peertube\.gaialabs\.ch| + tube\.kher\.nl| + peertube\.qtg\.fr| + video\.migennes\.net| + tube\.p2p\.legal| + troll\.tv| + videos\.iut-orsay\.fr| + peertube\.solidev\.net| + videos\.cemea\.org| + video\.passageenseine\.fr| + videos\.festivalparminous\.org| + peertube\.touhoppai\.moe| + sikke\.fi| + peer\.hostux\.social| + share\.tube| + peertube\.walkingmountains\.fr| + videos\.benpro\.fr| + peertube\.parleur\.net| + peertube\.heraut\.eu| + tube\.aquilenet\.fr| + peertube\.gegeweb\.eu| + framatube\.org| + thinkerview\.video| + tube\.conferences-gesticulees\.net| + peertube\.datagueule\.tv| + video\.lqdn\.fr| + tube\.mochi\.academy| + media\.zat\.im| + video\.colibris-outilslibres\.org| + tube\.svnet\.fr| + peertube\.video| + peertube2\.cpy\.re| + peertube3\.cpy\.re| + videos\.tcit\.fr| + peertube\.cpy\.re| + canard\.tube + )''' + _UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' + _API_BASE = 'https://%s/api/v1/videos/%s/%s' + _VALID_URL = r'''(?x) + (?: + peertube:(?P[^:]+):| + https?://(?P%s)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/ + ) + (?P%s) + ''' % (_INSTANCES_RE, _UUID_RE) + _EMBED_REGEX = [r'''(?x)]+\bsrc=["\'](?P(?:https?:)?//{_INSTANCES_RE}/videos/embed/{cls._UUID_RE})'''] + _TESTS = [{ + 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d', + 'md5': '8563064d245a4be5705bddb22bb00a28', + 'info_dict': { + 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d', + 'ext': 'mp4', + 'title': 'What is PeerTube?', + 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10', + 'thumbnail': r're:https?://.*\.(?:jpg|png)', + 'timestamp': 1538391166, + 'upload_date': '20181001', + 'uploader': 'Framasoft', + 'uploader_id': '3', + 'uploader_url': 'https://framatube.org/accounts/framasoft', + 'channel': 'A propos de PeerTube', + 'channel_id': '2215', + 'channel_url': 'https://framatube.org/video-channels/joinpeertube', + 'language': 'en', + 'license': 'Attribution - Share Alike', + 'duration': 113, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'tags': ['framasoft', 'peertube'], + 'categories': ['Science & Technology'], + } + }, { + 'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e', + 'info_dict': { + 'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e', + 'ext': 'mp4', + 'title': 'E2E tests', + 'uploader_id': '37855', + 'timestamp': 1589276219, + 'upload_date': '20200512', + 'uploader': 'chocobozzz', + } + }, { + 'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd', + 'info_dict': { + 'id': '3fbif9S3WmtTP8gGsC5HBd', + 'ext': 'mp4', + 'title': 'E2E tests', + 'uploader_id': '37855', + 'timestamp': 1589276219, + 'upload_date': '20200512', + 'uploader': 'chocobozzz', + }, + }, { + 'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd', + 'info_dict': { + 'id': '3fbif9S3WmtTP8gGsC5HBd', + 'ext': 'mp4', + 'title': 'E2E tests', + 'uploader_id': '37855', + 'timestamp': 1589276219, + 'upload_date': '20200512', + 'uploader': 'chocobozzz', + }, + }, { + # Issue #26002 + 'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc', + 'info_dict': { + 'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc', + 'ext': 'mp4', + 'title': 'Dot matrix printer shell demo', + 'uploader_id': '3', + 'timestamp': 1587401293, + 'upload_date': '20200420', + 'uploader': 'Drew DeVault', + } + }, { + 'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', + 'only_matching': True, + }, { + # nsfw + 'url': 'https://vod.ksite.de/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39', + 'only_matching': True, + }, { + 'url': 'https://vod.ksite.de/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7', + 'only_matching': True, + }, { + 'url': 'https://peertube.tv/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8', + 'only_matching': True, + }, { + 'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205', + 'only_matching': True, + }] + + @staticmethod + def _extract_peertube_url(webpage, source_url): + mobj = re.match( + r'https?://(?P[^/]+)/(?:videos/(?:watch|embed)|w)/(?P%s)' + % PeerTubeIE._UUID_RE, source_url) + if mobj and any(p in webpage for p in ( + 'meta property="og:platform" content="PeerTube"', + 'PeerTube<', + 'There will be other non JS-based clients to access PeerTube', + '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')): + return 'peertube:%s:%s' % mobj.group('host', 'id') + + @classmethod + def _extract_embed_urls(cls, url, webpage): + embeds = tuple(super()._extract_embed_urls(url, webpage)) + if embeds: + return embeds + + peertube_url = cls._extract_peertube_url(webpage, url) + if peertube_url: + return [peertube_url] + + def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True): + return self._download_json( + self._API_BASE % (host, video_id, path), video_id, + note=note, errnote=errnote, fatal=fatal) + + def _get_subtitles(self, host, video_id): + captions = self._call_api( + host, video_id, 'captions', note='Downloading captions JSON', + fatal=False) + if not isinstance(captions, dict): + return + data = captions.get('data') + if not isinstance(data, list): + return + subtitles = {} + for e in data: + language_id = try_get(e, lambda x: x['language']['id'], compat_str) + caption_url = urljoin('https://%s' % host, e.get('captionPath')) + if not caption_url: + continue + subtitles.setdefault(language_id or 'en', []).append({ + 'url': caption_url, + }) + return subtitles + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + host = mobj.group('host') or mobj.group('host_2') + video_id = mobj.group('id') + + video = self._call_api( + host, video_id, '', note='Downloading video JSON') + + title = video['name'] + + formats = [] + files = video.get('files') or [] + for playlist in (video.get('streamingPlaylists') or []): + if not isinstance(playlist, dict): + continue + playlist_files = playlist.get('files') + if not (playlist_files and isinstance(playlist_files, list)): + continue + files.extend(playlist_files) + for file_ in files: + if not isinstance(file_, dict): + continue + file_url = url_or_none(file_.get('fileUrl')) + if not file_url: + continue + file_size = int_or_none(file_.get('size')) + format_id = try_get( + file_, lambda x: x['resolution']['label'], compat_str) + f = parse_resolution(format_id) + f.update({ + 'url': file_url, + 'format_id': format_id, + 'filesize': file_size, + }) + if format_id == '0p': + f['vcodec'] = 'none' + else: + f['fps'] = int_or_none(file_.get('fps')) + formats.append(f) + + description = video.get('description') + if description and len(description) >= 250: + # description is shortened + full_description = self._call_api( + host, video_id, 'description', note='Downloading description JSON', + fatal=False) + + if isinstance(full_description, dict): + description = str_or_none(full_description.get('description')) or description + + subtitles = self.extract_subtitles(host, video_id) + + def data(section, field, type_): + return try_get(video, lambda x: x[section][field], type_) + + def account_data(field, type_): + return data('account', field, type_) + + def channel_data(field, type_): + return data('channel', field, type_) + + category = data('category', 'label', compat_str) + categories = [category] if category else None + + nsfw = video.get('nsfw') + if nsfw is bool: + age_limit = 18 if nsfw else 0 + else: + age_limit = None + + webpage_url = 'https://%s/videos/watch/%s' % (host, video_id) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')), + 'timestamp': unified_timestamp(video.get('publishedAt')), + 'uploader': account_data('displayName', compat_str), + 'uploader_id': str_or_none(account_data('id', int)), + 'uploader_url': url_or_none(account_data('url', compat_str)), + 'channel': channel_data('displayName', compat_str), + 'channel_id': str_or_none(channel_data('id', int)), + 'channel_url': url_or_none(channel_data('url', compat_str)), + 'language': data('language', 'id', compat_str), + 'license': data('licence', 'label', compat_str), + 'duration': int_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('views')), + 'like_count': int_or_none(video.get('likes')), + 'dislike_count': int_or_none(video.get('dislikes')), + 'age_limit': age_limit, + 'tags': try_get(video, lambda x: x['tags'], list), + 'categories': categories, + 'formats': formats, + 'subtitles': subtitles, + 'webpage_url': webpage_url, + } + + +class PeerTubePlaylistIE(InfoExtractor): + IE_NAME = 'PeerTube:Playlist' + _TYPES = { + 'a': 'accounts', + 'c': 'video-channels', + 'w/p': 'video-playlists', + } + _VALID_URL = r'''(?x) + https?://(?P<host>%s)/(?P<type>(?:%s))/ + (?P<id>[^/]+) + ''' % (PeerTubeIE._INSTANCES_RE, '|'.join(_TYPES.keys())) + _TESTS = [{ + 'url': 'https://peertube.debian.social/w/p/hFdJoTuyhNJVa1cDWd1d12', + 'info_dict': { + 'id': 'hFdJoTuyhNJVa1cDWd1d12', + 'description': 'Diversas palestras do Richard Stallman no Brasil.', + 'title': 'Richard Stallman no Brasil', + 'timestamp': 1599676222, + }, + 'playlist_mincount': 9, + }, { + 'url': 'https://peertube2.cpy.re/a/chocobozzz/videos', + 'info_dict': { + 'id': 'chocobozzz', + 'timestamp': 1553874564, + 'title': 'chocobozzz', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://framatube.org/c/bf54d359-cfad-4935-9d45-9d6be93f63e8/videos', + 'info_dict': { + 'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8', + 'timestamp': 1519917377, + 'title': 'Les vidéos de Framasoft', + }, + 'playlist_mincount': 345, + }, { + 'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos', + 'info_dict': { + 'id': 'blender_open_movies@video.blender.org', + 'timestamp': 1542287810, + 'title': 'Official Blender Open Movies', + }, + 'playlist_mincount': 11, + }] + _API_BASE = 'https://%s/api/v1/%s/%s%s' + _PAGE_SIZE = 30 + + def call_api(self, host, name, path, base, **kwargs): + return self._download_json( + self._API_BASE % (host, base, name, path), name, **kwargs) + + def fetch_page(self, host, id, type, page): + page += 1 + video_data = self.call_api( + host, id, + f'/videos?sort=-createdAt&start={self._PAGE_SIZE * (page - 1)}&count={self._PAGE_SIZE}&nsfw=both', + type, note=f'Downloading page {page}').get('data', []) + for video in video_data: + shortUUID = video.get('shortUUID') or try_get(video, lambda x: x['video']['shortUUID']) + video_title = video.get('name') or try_get(video, lambda x: x['video']['name']) + yield self.url_result( + f'https://{host}/w/{shortUUID}', PeerTubeIE.ie_key(), + video_id=shortUUID, video_title=video_title) + + def _extract_playlist(self, host, type, id): + info = self.call_api(host, id, '', type, note='Downloading playlist information', fatal=False) + + playlist_title = info.get('displayName') + playlist_description = info.get('description') + playlist_timestamp = unified_timestamp(info.get('createdAt')) + channel = try_get(info, lambda x: x['ownerAccount']['name']) or info.get('displayName') + channel_id = try_get(info, lambda x: x['ownerAccount']['id']) or info.get('id') + thumbnail = format_field(info, 'thumbnailPath', f'https://{host}%s') + + entries = OnDemandPagedList(functools.partial( + self.fetch_page, host, id, type), self._PAGE_SIZE) + + return self.playlist_result( + entries, id, playlist_title, playlist_description, + timestamp=playlist_timestamp, channel=channel, channel_id=channel_id, thumbnail=thumbnail) + + def _real_extract(self, url): + type, host, id = self._match_valid_url(url).group('type', 'host', 'id') + type = self._TYPES[type] + return self._extract_playlist(host, type, id) diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py new file mode 100644 index 0000000..a709e21 --- /dev/null +++ b/yt_dlp/extractor/peertv.py @@ -0,0 +1,52 @@ +from .common import InfoExtractor +from ..utils import js_to_json + + +class PeerTVIE(InfoExtractor): + IE_NAME = 'peer.tv' + _VALID_URL = r'https?://(?:www\.)?peer\.tv/(?:de|it|en)/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.peer.tv/de/841', + 'info_dict': { + 'id': '841', + 'ext': 'mp4', + 'title': 'Die Brunnenburg', + 'description': 'md5:4395f6142b090338340ab88a3aae24ed', + }, + }, { + 'url': 'https://www.peer.tv/it/404', + 'info_dict': { + 'id': '404', + 'ext': 'mp4', + 'title': 'Cascate di ghiaccio in Val Gardena', + 'description': 'md5:e8e5907f236171842674e8090e3577b8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_key = self._html_search_regex(r'player\.peer\.tv/js/([a-zA-Z0-9]+)', webpage, 'video key') + + js = self._download_webpage(f'https://player.peer.tv/js/{video_key}/', video_id, + headers={'Referer': 'https://www.peer.tv/'}, note='Downloading session id') + + session_id = self._search_regex(r'["\']session_id["\']:\s*["\']([a-zA-Z0-9]+)["\']', js, 'session id') + + player_webpage = self._download_webpage( + f'https://player.peer.tv/jsc/{video_key}/{session_id}?jsr=aHR0cHM6Ly93d3cucGVlci50di9kZS84NDE=&cs=UTF-8&mq=2&ua=0&webm=p&mp4=p&hls=1', + video_id, note='Downloading player webpage') + + m3u8_url = self._search_regex(r'["\']playlist_url["\']:\s*(["\'][^"\']+["\'])', player_webpage, 'm3u8 url') + m3u8_url = self._parse_json(m3u8_url, video_id, transform_source=js_to_json) + + formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') + + return { + 'id': video_id, + 'title': self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title').replace('\xa0', ' '), + 'formats': formats, + 'description': self._html_search_meta(('og:description', 'description'), webpage), + 'thumbnail': self._html_search_meta(('og:image', 'image'), webpage) + } diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py new file mode 100644 index 0000000..7864299 --- /dev/null +++ b/yt_dlp/extractor/peloton.py @@ -0,0 +1,215 @@ +import json +import re +import urllib.parse + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + float_or_none, + str_or_none, + traverse_obj, + url_or_none, +) + + +class PelotonIE(InfoExtractor): + IE_NAME = 'peloton' + _NETRC_MACHINE = 'peloton' + _VALID_URL = r'https?://members\.onepeloton\.com/classes/player/(?P<id>[a-f0-9]+)' + _TESTS = [{ + 'url': 'https://members.onepeloton.com/classes/player/0e9653eb53544eeb881298c8d7a87b86', + 'info_dict': { + 'id': '0e9653eb53544eeb881298c8d7a87b86', + 'title': '20 min Chest & Back Strength', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.+\.jpg', + 'description': 'md5:fcd5be9b9eda0194b470e13219050a66', + 'creator': 'Chase Tucker', + 'release_timestamp': 1556141400, + 'timestamp': 1556141400, + 'upload_date': '20190424', + 'duration': 1389, + 'categories': ['Strength'], + 'tags': ['Workout Mat', 'Light Weights', 'Medium Weights'], + 'is_live': False, + 'chapters': 'count:1', + 'subtitles': {'en': [{ + 'url': r're:^https?://.+', + 'ext': 'vtt' + }]}, + }, 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Account needed' + }, { + 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8', + 'info_dict': { + 'id': '26603d53d6bb4de1b340514864a6a6a8', + 'title': '30 min Earth Day Run', + 'ext': 'm4a', + 'thumbnail': r're:https://.+\.jpg', + 'description': 'md5:adc065a073934d7ee0475d217afe0c3d', + 'creator': 'Selena Samuela', + 'release_timestamp': 1587567600, + 'timestamp': 1587567600, + 'upload_date': '20200422', + 'duration': 1802, + 'categories': ['Running'], + 'is_live': False, + 'chapters': 'count:3' + }, 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Account needed' + }] + + _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s' + + def _start_session(self, video_id): + self._download_webpage('https://api.onepeloton.com/api/started_client_session', video_id, note='Starting session') + + def _login(self, video_id): + username, password = self._get_login_info() + if not (username and password): + self.raise_login_required() + try: + self._download_json( + 'https://api.onepeloton.com/auth/login', video_id, note='Logging in', + data=json.dumps({ + 'username_or_email': username, + 'password': password, + 'with_pubsub': False + }).encode(), + headers={'Content-Type': 'application/json', 'User-Agent': 'web'}) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + json_string = self._webpage_read_content(e.cause.response, None, video_id) + res = self._parse_json(json_string, video_id) + raise ExtractorError(res['message'], expected=res['message'] == 'Login failed') + else: + raise + + def _get_token(self, video_id): + try: + subscription = self._download_json( + 'https://api.onepeloton.com/api/subscription/stream', video_id, note='Downloading token', + data=json.dumps({}).encode(), headers={'Content-Type': 'application/json'}) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + json_string = self._webpage_read_content(e.cause.response, None, video_id) + res = self._parse_json(json_string, video_id) + raise ExtractorError(res['message'], expected=res['message'] == 'Stream limit reached') + else: + raise + return subscription['token'] + + def _real_extract(self, url): + video_id = self._match_id(url) + try: + self._start_session(video_id) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + self._login(video_id) + self._start_session(video_id) + else: + raise + + metadata = self._download_json('https://api.onepeloton.com/api/ride/%s/details?stream_source=multichannel' % video_id, video_id) + ride_data = metadata.get('ride') + if not ride_data: + raise ExtractorError('Missing stream metadata') + token = self._get_token(video_id) + + is_live = False + if ride_data.get('content_format') == 'audio': + url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), urllib.parse.quote(token)) + formats = [{ + 'url': url, + 'ext': 'm4a', + 'format_id': 'audio', + 'vcodec': 'none', + }] + subtitles = {} + else: + if ride_data.get('vod_stream_url'): + url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % ( + ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]), + ride_data['vod_stream_url'], + urllib.parse.quote(urllib.parse.quote(token))) + elif ride_data.get('live_stream_url'): + url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), urllib.parse.quote(token)) + is_live = True + else: + raise ExtractorError('Missing video URL') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') + + if metadata.get('instructor_cues'): + subtitles['cues'] = [{ + 'data': json.dumps(metadata.get('instructor_cues')), + 'ext': 'json' + }] + + category = ride_data.get('fitness_discipline_display_name') + chapters = [{ + 'start_time': segment.get('start_time_offset'), + 'end_time': segment.get('start_time_offset') + segment.get('length'), + 'title': segment.get('name') + } for segment in traverse_obj(metadata, ('segments', 'segment_list'))] + + return { + 'id': video_id, + 'title': ride_data.get('title'), + 'formats': formats, + 'thumbnail': url_or_none(ride_data.get('image_url')), + 'description': str_or_none(ride_data.get('description')), + 'creator': traverse_obj(ride_data, ('instructor', 'name')), + 'release_timestamp': ride_data.get('original_air_time'), + 'timestamp': ride_data.get('original_air_time'), + 'subtitles': subtitles, + 'duration': float_or_none(ride_data.get('length')), + 'categories': [category] if category else None, + 'tags': traverse_obj(ride_data, ('equipment_tags', ..., 'name')), + 'is_live': is_live, + 'chapters': chapters + } + + +class PelotonLiveIE(InfoExtractor): + IE_NAME = 'peloton:live' + IE_DESC = 'Peloton Live' + _VALID_URL = r'https?://members\.onepeloton\.com/player/live/(?P<id>[a-f0-9]+)' + _TEST = { + 'url': 'https://members.onepeloton.com/player/live/eedee2d19f804a9788f53aa8bd38eb1b', + 'info_dict': { + 'id': '32edc92d28044be5bf6c7b6f1f8d1cbc', + 'title': '30 min HIIT Ride: Live from Home', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.+\.png', + 'description': 'md5:f0d7d8ed3f901b7ee3f62c1671c15817', + 'creator': 'Alex Toussaint', + 'release_timestamp': 1587736620, + 'timestamp': 1587736620, + 'upload_date': '20200424', + 'duration': 2014, + 'categories': ['Cycling'], + 'is_live': False, + 'chapters': 'count:3' + }, + 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Account needed' + } + + def _real_extract(self, url): + workout_id = self._match_id(url) + peloton = self._download_json(f'https://api.onepeloton.com/api/peloton/{workout_id}', workout_id) + + if peloton.get('ride_id'): + if not peloton.get('is_live') or peloton.get('is_encore') or peloton.get('status') != 'PRE_START': + return self.url_result('https://members.onepeloton.com/classes/player/%s' % peloton['ride_id']) + else: + raise ExtractorError('Ride has not started', expected=True) + else: + raise ExtractorError('Missing video ID') diff --git a/yt_dlp/extractor/performgroup.py b/yt_dlp/extractor/performgroup.py new file mode 100644 index 0000000..f4d7f22 --- /dev/null +++ b/yt_dlp/extractor/performgroup.py @@ -0,0 +1,77 @@ +from .common import InfoExtractor +from ..utils import int_or_none + + +class PerformGroupIE(InfoExtractor): + _VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})' + _TESTS = [{ + # http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html + 'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab', + 'md5': '259cb03d142e2e52471e8837ecacb29f', + 'info_dict': { + 'id': 'xgrwobuzumes1lwjxtcdpwgxd', + 'ext': 'mp4', + 'title': 'Liga MX: Keine Einsicht nach Horrorfoul', + 'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b', + 'timestamp': 1511533477, + 'upload_date': '20171124', + } + }] + + def _call_api(self, service, auth_token, content_id, referer_url): + return self._download_json( + 'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id), + content_id, headers={ + 'Referer': referer_url, + 'Origin': 'http://player.performgroup.com', + }, query={ + '_fmt': 'json', + }) + + def _real_extract(self, url): + player_id, auth_token = self._match_valid_url(url).groups() + bootstrap = self._call_api('bootstrap', auth_token, player_id, url) + video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0] + video_id = video['uuid'] + vod = self._call_api('vod', auth_token, video_id, url) + media = vod['videos']['video'][0]['media'] + + formats = [] + hls_url = media.get('hls', {}).get('url') + if hls_url: + formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + hds_url = media.get('hds', {}).get('url') + if hds_url: + formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False)) + + for c in media.get('content', []): + c_url = c.get('url') + if not c_url: + continue + tbr = int_or_none(c.get('bitrate'), 1000) + format_id = 'http' + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': c_url, + 'tbr': tbr, + 'width': int_or_none(c.get('width')), + 'height': int_or_none(c.get('height')), + 'filesize': int_or_none(c.get('fileSize')), + 'vcodec': c.get('type'), + 'fps': int_or_none(c.get('videoFrameRate')), + 'vbr': int_or_none(c.get('videoRate'), 1000), + 'abr': int_or_none(c.get('audioRate'), 1000), + }) + + return { + 'id': video_id, + 'title': video['title'], + 'description': video.get('description'), + 'thumbnail': video.get('poster'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': int_or_none(video.get('publishedTime'), 1000), + 'formats': formats, + } diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py new file mode 100644 index 0000000..d2351df --- /dev/null +++ b/yt_dlp/extractor/periscope.py @@ -0,0 +1,188 @@ +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + unescapeHTML, +) +from ..utils.traversal import traverse_obj + + +class PeriscopeBaseIE(InfoExtractor): + _M3U8_HEADERS = { + 'Referer': 'https://www.periscope.tv/' + } + + def _call_api(self, method, query, item_id): + return self._download_json( + 'https://api.periscope.tv/api/v2/%s' % method, + item_id, query=query) + + def _parse_broadcast_data(self, broadcast, video_id): + title = broadcast.get('status') or 'Periscope Broadcast' + uploader = broadcast.get('user_display_name') or broadcast.get('username') + title = '%s - %s' % (uploader, title) if uploader else title + thumbnails = [{ + 'url': broadcast[image], + } for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)] + + return { + 'id': broadcast.get('id') or video_id, + 'title': title, + 'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none( + broadcast.get('created_at_ms'), scale=1000), + 'release_timestamp': int_or_none(broadcast.get('scheduled_start_ms'), scale=1000), + 'uploader': uploader, + 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), + 'thumbnails': thumbnails, + 'view_count': int_or_none(broadcast.get('total_watched')), + 'concurrent_view_count': int_or_none(broadcast.get('total_watching')), + 'tags': broadcast.get('tags'), + 'live_status': { + 'running': 'is_live', + 'not_started': 'is_upcoming', + }.get(traverse_obj(broadcast, ('state', {str.lower}))) or 'was_live' + } + + @staticmethod + def _extract_common_format_info(broadcast): + return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height')) + + @staticmethod + def _add_width_and_height(f, width, height): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + + def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True): + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', + entry_protocol='m3u8_native' + if state in ('ended', 'timed_out') else 'm3u8', + m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS) + if len(m3u8_formats) == 1: + self._add_width_and_height(m3u8_formats[0], width, height) + for f in m3u8_formats: + f.setdefault('http_headers', {}).update(self._M3U8_HEADERS) + return m3u8_formats + + +class PeriscopeIE(PeriscopeBaseIE): + IE_DESC = 'Periscope' + IE_NAME = 'periscope' + _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' + _EMBED_REGEX = [r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1'] + # Alive example URLs can be found here https://www.periscope.tv/ + _TESTS = [{ + 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', + 'md5': '65b57957972e503fcbbaeed8f4fa04ca', + 'info_dict': { + 'id': '56102209', + 'ext': 'mp4', + 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗', + 'timestamp': 1438978559, + 'upload_date': '20150807', + 'uploader': 'Bec Boop', + 'uploader_id': '1465763', + }, + 'skip': 'Expires in 24 hours', + }, { + 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', + 'only_matching': True, + }, { + 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX', + 'only_matching': True, + }, { + 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', + 'only_matching': True, + }] + + def _real_extract(self, url): + token = self._match_id(url) + + stream = self._call_api( + 'accessVideoPublic', {'broadcast_id': token}, token) + + broadcast = stream['broadcast'] + info = self._parse_broadcast_data(broadcast, token) + + state = broadcast.get('state').lower() + width = int_or_none(broadcast.get('width')) + height = int_or_none(broadcast.get('height')) + + def add_width_and_height(f): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + + video_urls = set() + formats = [] + for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): + video_url = stream.get(format_id + '_url') + if not video_url or video_url in video_urls: + continue + video_urls.add(video_url) + if format_id != 'rtmp': + m3u8_formats = self._extract_pscp_m3u8_formats( + video_url, token, format_id, state, width, height, False) + formats.extend(m3u8_formats) + continue + rtmp_format = { + 'url': video_url, + 'ext': 'flv' if format_id == 'rtmp' else 'mp4', + } + self._add_width_and_height(rtmp_format) + formats.append(rtmp_format) + + info['formats'] = formats + return info + + +class PeriscopeUserIE(PeriscopeBaseIE): + _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$' + IE_DESC = 'Periscope user videos' + IE_NAME = 'periscope:user' + + _TEST = { + 'url': 'https://www.periscope.tv/LularoeHusbandMike/', + 'info_dict': { + 'id': 'LularoeHusbandMike', + 'title': 'LULAROE HUSBAND MIKE', + 'description': 'md5:6cf4ec8047768098da58e446e82c82f0', + }, + # Periscope only shows videos in the last 24 hours, so it's possible to + # get 0 videos + 'playlist_mincount': 0, + } + + def _real_extract(self, url): + user_name = self._match_id(url) + + webpage = self._download_webpage(url, user_name) + + data_store = self._parse_json( + unescapeHTML(self._search_regex( + r'data-store=(["\'])(?P<data>.+?)\1', + webpage, 'data store', default='{}', group='data')), + user_name) + + user = list(data_store['UserCache']['users'].values())[0]['user'] + user_id = user['id'] + session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] + + broadcasts = self._call_api( + 'getUserBroadcastsPublic', + {'user_id': user_id, 'session_id': session_id}, + user_name)['broadcasts'] + + broadcast_ids = [ + broadcast['id'] for broadcast in broadcasts if broadcast.get('id')] + + title = user.get('display_name') or user.get('username') or user_name + description = user.get('description') + + entries = [ + self.url_result( + 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id)) + for broadcast_id in broadcast_ids] + + return self.playlist_result(entries, user_id, title, description) diff --git a/yt_dlp/extractor/pgatour.py b/yt_dlp/extractor/pgatour.py new file mode 100644 index 0000000..36c2c62 --- /dev/null +++ b/yt_dlp/extractor/pgatour.py @@ -0,0 +1,47 @@ +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor + + +class PGATourIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pgatour\.com/video/[\w-]+/(?P<tc>T)?(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.pgatour.com/video/competition/T6322447785112/adam-hadwin-2023-the-players-round-4-18th-hole-shot-1', + 'info_dict': { + 'id': '6322447785112', + 'ext': 'mp4', + 'title': 'Adam Hadwin | 2023 THE PLAYERS | Round 4 | 18th hole | Shot 1', + 'uploader_id': '6116716431001', + 'upload_date': '20230312', + 'timestamp': 1678653136, + 'duration': 20.011, + 'thumbnail': r're:^https://.+\.jpg', + 'tags': 'count:7', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.pgatour.com/video/features/6322506425112/follow-the-players-trophy-on-championship-sunday', + 'info_dict': { + 'id': '6322506425112', + 'ext': 'mp4', + 'title': 'Follow THE PLAYERS trophy on Championship Sunday', + 'description': 'md5:4d29e4bdfa03694a0ebfd08950398568', + 'uploader_id': '6082840763001', + 'upload_date': '20230313', + 'timestamp': 1678739835, + 'duration': 123.435, + 'thumbnail': r're:^https://.+\.jpg', + 'tags': 'count:8', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + video_id, is_tourcast = self._match_valid_url(url).group('id', 'tc') + + # From https://www.pgatour.com/_next/static/chunks/pages/_app-8bcf849560daf38d.js + account_id = '6116716431001' if is_tourcast else '6082840763001' + player_id = 'Vsd5Umu8r' if is_tourcast else 'FWIBYMBPj' + + return self.url_result( + f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}', + BrightcoveNewIE) diff --git a/yt_dlp/extractor/philharmoniedeparis.py b/yt_dlp/extractor/philharmoniedeparis.py new file mode 100644 index 0000000..e8494a0 --- /dev/null +++ b/yt_dlp/extractor/philharmoniedeparis.py @@ -0,0 +1,97 @@ +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import try_get + + +class PhilharmonieDeParisIE(InfoExtractor): + IE_DESC = 'Philharmonie de Paris' + _VALID_URL = r'''(?x) + https?:// + (?: + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)| + pad\.philharmoniedeparis\.fr/(?:doc/CIMU/|player\.aspx\?id=)| + philharmoniedeparis\.fr/fr/live/concert/| + otoplayer\.philharmoniedeparis\.fr/fr/embed/ + ) + (?P<id>\d+) + ''' + _TESTS = [{ + 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1129666-danses-symphoniques', + 'md5': '24bdb7e86c200c107680e1f7770330ae', + 'info_dict': { + 'id': '1129666', + 'ext': 'mp4', + 'title': 'Danses symphoniques. Orchestre symphonique Divertimento - Zahia Ziouani. Bizet, de Falla, Stravinski, Moussorgski, Saint-Saëns', + }, + }, { + 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1032066-akademie-fur-alte-musik-berlin-rias-kammerchor-rene-jacobs-passion-selon-saint-jean-de-johann', + 'info_dict': { + 'id': '1032066', + 'title': 'Akademie für alte Musik Berlin, Rias Kammerchor, René Jacobs : Passion selon saint Jean de Johann Sebastian Bach', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1030324-orchestre-philharmonique-de-radio-france-myung-whun-chung-renaud-capucon-pascal-dusapin-johannes', + 'only_matching': True, + }, { + 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', + 'only_matching': True, + }, { + 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', + 'only_matching': True, + }, { + 'url': 'https://otoplayer.philharmoniedeparis.fr/fr/embed/1098406?lang=fr-FR', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + config = self._download_json( + 'https://otoplayer.philharmoniedeparis.fr/fr/config/%s.json' % video_id, video_id, query={ + 'id': video_id, + 'lang': 'fr-FR', + }) + + def extract_entry(source): + if not isinstance(source, dict): + return + title = source.get('title') + if not title: + return + files = source.get('files') + if not isinstance(files, dict): + return + format_urls = set() + formats = [] + for format_id in ('mobile', 'desktop'): + format_url = try_get( + files, lambda x: x[format_id]['file'], compat_str) + if not format_url or format_url in format_urls: + continue + format_urls.add(format_url) + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + if not formats and not self.get_param('ignore_no_formats'): + return + return { + 'title': title, + 'formats': formats, + 'thumbnail': files.get('thumbnail'), + } + info = extract_entry(config) + if info: + info.update({ + 'id': video_id, + }) + return info + entries = [] + for num, chapter in enumerate(config['chapters'], start=1): + entry = extract_entry(chapter) + if entry is None: + continue + entry['id'] = '%s-%d' % (video_id, num) + entries.append(entry) + + return self.playlist_result(entries, video_id, config.get('title')) diff --git a/yt_dlp/extractor/phoenix.py b/yt_dlp/extractor/phoenix.py new file mode 100644 index 0000000..5fa133a --- /dev/null +++ b/yt_dlp/extractor/phoenix.py @@ -0,0 +1,130 @@ +import re + +from .youtube import YoutubeIE +from .zdf import ZDFBaseIE +from ..compat import compat_str +from ..utils import ( + int_or_none, + merge_dicts, + try_get, + unified_timestamp, + urljoin, +) + + +class PhoenixIE(ZDFBaseIE): + IE_NAME = 'phoenix.de' + _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html' + _TESTS = [{ + # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html + 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html', + 'md5': '34ec321e7eb34231fd88616c65c92db0', + 'info_dict': { + 'id': '210222_phx_nachgehakt_corona_protest', + 'ext': 'mp4', + 'title': 'Wohin führt der Protest in der Pandemie?', + 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', + 'duration': 1691, + 'timestamp': 1613902500, + 'upload_date': '20210221', + 'uploader': 'Phoenix', + 'series': 'corona nachgehakt', + 'episode': 'Wohin führt der Protest in der Pandemie?', + }, + }, { + # Youtube embed + 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html', + 'info_dict': { + 'id': 'hMQtqFYjomk', + 'ext': 'mp4', + 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?', + 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd', + 'duration': 3509, + 'upload_date': '20201219', + 'uploader': 'phoenix', + 'uploader_id': 'phoenix', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html', + 'only_matching': True, + }, { + # no media + 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html', + 'only_matching': True, + }, { + # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html + 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche', + 'only_matching': True, + }] + + def _real_extract(self, url): + article_id = self._match_id(url) + + article = self._download_json( + 'https://www.phoenix.de/response/id/%s' % article_id, article_id, + 'Downloading article JSON') + + video = article['absaetze'][0] + title = video.get('titel') or article.get('subtitel') + + if video.get('typ') == 'video-youtube': + video_id = video['id'] + return self.url_result( + video_id, ie=YoutubeIE.ie_key(), video_id=video_id, + video_title=title) + + video_id = compat_str(video.get('basename') or video.get('content')) + + details = self._download_json( + 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php', + video_id, 'Downloading details JSON', query={ + 'ak': 'web', + 'ptmd': 'true', + 'id': video_id, + 'profile': 'player2', + }) + + title = title or details['title'] + content_id = details['tracking']['nielsen']['content']['assetid'] + + info = self._extract_ptmd( + 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id, + content_id, None, url) + + duration = int_or_none(try_get( + details, lambda x: x['tracking']['nielsen']['content']['length'])) + timestamp = unified_timestamp(details.get('editorialDate')) + series = try_get( + details, lambda x: x['tracking']['nielsen']['content']['program'], + compat_str) + episode = title if details.get('contentType') == 'episode' else None + + thumbnails = [] + teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {} + for thumbnail_key, thumbnail_url in teaser_images.items(): + thumbnail_url = urljoin(url, thumbnail_url) + if not thumbnail_url: + continue + thumbnail = { + 'url': thumbnail_url, + } + m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) + if m: + thumbnail['width'] = int(m.group(1)) + thumbnail['height'] = int(m.group(2)) + thumbnails.append(thumbnail) + + return merge_dicts(info, { + 'id': content_id, + 'title': title, + 'description': details.get('leadParagraph'), + 'duration': duration, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'uploader': details.get('tvService'), + 'series': series, + 'episode': episode, + }) diff --git a/yt_dlp/extractor/photobucket.py b/yt_dlp/extractor/photobucket.py new file mode 100644 index 0000000..71e9a48 --- /dev/null +++ b/yt_dlp/extractor/photobucket.py @@ -0,0 +1,43 @@ +import json + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class PhotobucketIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' + _TEST = { + 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', + 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', + 'info_dict': { + 'id': 'zpsc0c3b9fa', + 'ext': 'mp4', + 'timestamp': 1367669341, + 'upload_date': '20130504', + 'uploader': 'rachaneronas', + 'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + video_extension = mobj.group('ext') + + webpage = self._download_webpage(url, video_id) + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', + webpage, 'info json') + info = json.loads(info_json) + url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) + return { + 'id': video_id, + 'url': url, + 'uploader': info['username'], + 'timestamp': info['creationDate'], + 'title': info['title'], + 'ext': video_extension, + 'thumbnail': info['thumbUrl'], + } diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py new file mode 100644 index 0000000..3ae985d --- /dev/null +++ b/yt_dlp/extractor/piapro.py @@ -0,0 +1,121 @@ +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + ExtractorError, + parse_duration, + parse_filesize, + str_to_int, + unified_timestamp, + urlencode_postdata, +) + + +class PiaproIE(InfoExtractor): + _NETRC_MACHINE = 'piapro' + _VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>[\w-]+)/?' + _TESTS = [{ + 'url': 'https://piapro.jp/t/NXYR', + 'md5': 'f7c0f760913fb1d44a1c45a4af793909', + 'info_dict': { + 'id': 'NXYR', + 'ext': 'mp3', + 'uploader': 'wowaka', + 'uploader_id': 'wowaka', + 'title': '裏表ラバーズ', + 'description': 'http://www.nicovideo.jp/watch/sm8082467', + 'duration': 189.0, + 'timestamp': 1251785475, + 'thumbnail': r're:^https?://.*\.(?:png|jpg)$', + 'upload_date': '20090901', + 'view_count': int, + } + }, { + 'note': 'There are break lines in description, mandating (?s) flag', + 'url': 'https://piapro.jp/t/9cSd', + 'md5': '952bb6d1e8de95050206408a87790676', + 'info_dict': { + 'id': '9cSd', + 'ext': 'mp3', + 'title': '青に溶けた風船 / 初音ミク', + 'description': 'md5:d395a9bd151447631a5a1460bc7f9132', + 'uploader': 'シアン・キノ', + 'duration': 229.0, + 'timestamp': 1644030039, + 'upload_date': '20220205', + 'view_count': int, + 'thumbnail': r're:^https?://.*\.(?:png|jpg)$', + 'uploader_id': 'cyankino', + } + }, { + 'url': 'https://piapro.jp/content/hcw0z3a169wtemz6', + 'only_matching': True + }, { + 'url': 'https://piapro.jp/t/-SO-', + 'only_matching': True + }] + + _login_status = False + + def _perform_login(self, username, password): + login_ok = True + login_form_strs = { + '_username': username, + '_password': password, + '_remember_me': 'on', + 'login': 'ログイン' + } + self._request_webpage('https://piapro.jp/login/', None) + urlh = self._request_webpage( + 'https://piapro.jp/login/exe', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(login_form_strs)) + if urlh is False: + login_ok = False + else: + parts = compat_urlparse.urlparse(urlh.url) + if parts.path != '/': + login_ok = False + if not login_ok: + self.report_warning( + 'unable to log in: bad username or password') + self._login_status = login_ok + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + category_id = self._search_regex(r'categoryId=(.+)">', webpage, 'category ID') + if category_id not in ('1', '2', '21', '22', '23', '24', '25'): + raise ExtractorError('The URL does not contain audio.', expected=True) + + str_duration, str_filesize = self._search_regex( + r'サイズ:</span>(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size', + group=(1, 2), default=(None, None)) + str_viewcount = self._search_regex(r'閲覧数:</span>([0-9,]+)\s+', webpage, 'view count', fatal=False) + + uploader_id, uploader = self._search_regex( + r'<a\s+class="cd_user-name"\s+href="/(.*)">([^<]+)さん<', webpage, 'uploader', + group=(1, 2), default=(None, None)) + content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID') + create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp') + + player_webpage = self._download_webpage( + f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}', + video_id, note='Downloading player webpage') + + return { + 'id': video_id, + 'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False), + 'description': self._html_search_regex(r'(?s)<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False), + 'uploader': uploader, + 'uploader_id': uploader_id, + 'timestamp': unified_timestamp(create_date, False), + 'duration': parse_duration(str_duration), + 'view_count': str_to_int(str_viewcount), + 'thumbnail': self._html_search_meta('twitter:image', webpage), + + 'filesize_approx': parse_filesize(str_filesize.replace(',', '')), + 'url': self._search_regex(r'mp3:\s*\'(.*?)\'\}', player_webpage, 'url'), + 'ext': 'mp3', + 'vcodec': 'none', + } diff --git a/yt_dlp/extractor/piaulizaportal.py b/yt_dlp/extractor/piaulizaportal.py new file mode 100644 index 0000000..1eb6d92 --- /dev/null +++ b/yt_dlp/extractor/piaulizaportal.py @@ -0,0 +1,70 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + parse_qs, + time_seconds, + traverse_obj, +) + + +class PIAULIZAPortalIE(InfoExtractor): + IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM' + _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _TESTS = [{ + 'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44', + 'info_dict': { + 'id': '005f18b7-e810-5618-cb82-0987c5755d44', + 'title': 'プレゼンテーションプレイヤーのサンプル', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }, { + 'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1', + 'info_dict': { + 'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d', + 'title': '【確認用】視聴サンプルページ(ULIZA)', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0))) + if expires and expires <= time_seconds(): + raise ExtractorError('The link is expired.', video_id=video_id, expected=True) + + webpage = self._download_webpage(url, video_id) + + player_data = self._download_webpage( + self._search_regex( + r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"', + webpage, 'player data url'), + video_id, headers={'Referer': 'https://ulizaportal.jp/'}, + note='Fetching player data', errnote='Unable to fetch player data') + + formats = self._extract_m3u8_formats( + self._search_regex( + r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, + 'm3u8 url', default=None), + video_id, fatal=False) + m3u8_type = self._search_regex( + r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None) + + return { + 'id': video_id, + 'title': self._html_extract_title(webpage), + 'formats': formats, + 'live_status': { + 'video': 'is_live', + 'dvr': 'was_live', # short-term archives + }.get(m3u8_type, 'not_live'), # VOD or long-term archives + } diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py new file mode 100644 index 0000000..d415ba2 --- /dev/null +++ b/yt_dlp/extractor/picarto.py @@ -0,0 +1,152 @@ +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + str_or_none, + traverse_obj, +) + + +class PicartoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)' + _TEST = { + 'url': 'https://picarto.tv/Setz', + 'info_dict': { + 'id': 'Setz', + 'ext': 'mp4', + 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'timestamp': int, + 'is_live': True + }, + 'skip': 'Stream is offline', + } + + @classmethod + def suitable(cls, url): + return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) + + def _real_extract(self, url): + channel_id = self._match_id(url) + + data = self._download_json( + 'https://ptvintern.picarto.tv/ptvapi', channel_id, query={ + 'query': '''{ + channel(name: "%s") { + adult + id + online + stream_name + title + } + getLoadBalancerUrl(channel_name: "%s") { + url + } +}''' % (channel_id, channel_id), + })['data'] + metadata = data['channel'] + + if metadata.get('online') == 0: + raise ExtractorError('Stream is offline', expected=True) + title = metadata['title'] + + cdn_data = self._download_json( + data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js', + channel_id, 'Downloading load balancing info') + + formats = [] + for source in (cdn_data.get('source') or []): + source_url = source.get('url') + if not source_url: + continue + source_type = source.get('type') + if source_type == 'html5/application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False)) + elif source_type == 'html5/video/mp4': + formats.append({ + 'url': source_url, + }) + + mature = metadata.get('adult') + if mature is None: + age_limit = None + else: + age_limit = 18 if mature is True else 0 + + return { + 'id': channel_id, + 'title': title.strip(), + 'is_live': True, + 'channel': channel_id, + 'channel_id': metadata.get('id'), + 'channel_url': 'https://picarto.tv/%s' % channel_id, + 'age_limit': age_limit, + 'formats': formats, + } + + +class PicartoVodIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv', + 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca', + 'info_dict': { + 'id': 'ArtofZod_2017.12.12.00.13.23.flv', + 'ext': 'mp4', + 'title': 'ArtofZod_2017.12.12.00.13.23.flv', + 'thumbnail': r're:^https?://.*\.jpg' + }, + 'skip': 'The VOD does not exist', + }, { + 'url': 'https://picarto.tv/ArtofZod/videos/772650', + 'md5': '00067a0889f1f6869cc512e3e79c521b', + 'info_dict': { + 'id': '772650', + 'ext': 'mp4', + 'title': 'Art of Zod - Drawing and Painting', + 'thumbnail': r're:^https?://.*\.jpg', + 'channel': 'ArtofZod', + 'age_limit': 18, + } + }, { + 'url': 'https://picarto.tv/videopopout/Plague', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + data = self._download_json( + 'https://ptvintern.picarto.tv/ptvapi', video_id, query={ + 'query': f'''{{ + video(id: "{video_id}") {{ + id + title + adult + file_name + video_recording_image_url + channel {{ + name + }} + }} +}}''' + })['data']['video'] + + file_name = data['file_name'] + netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc + + formats = self._extract_m3u8_formats( + f'https://{netloc}/stream/hls/{file_name}/index.m3u8', video_id, 'mp4', m3u8_id='hls') + + return { + 'id': video_id, + **traverse_obj(data, { + 'id': ('id', {str_or_none}), + 'title': ('title', {str}), + 'thumbnail': 'video_recording_image_url', + 'channel': ('channel', 'name', {str}), + 'age_limit': ('adult', {lambda x: 18 if x else 0}), + }), + 'formats': formats, + } diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py new file mode 100644 index 0000000..97a9bf5 --- /dev/null +++ b/yt_dlp/extractor/piksel.py @@ -0,0 +1,174 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + dict_get, + ExtractorError, + int_or_none, + join_nonempty, + parse_iso8601, + traverse_obj, + try_get, + unescapeHTML, + urljoin, +) + + +class PikselIE(InfoExtractor): + _VALID_URL = r'''(?x)https?:// + (?: + (?: + player\. + (?: + olympusattelecom| + vibebyvista + )| + (?:api|player)\.multicastmedia| + (?:api-ovp|player)\.piksel + )\.com| + (?: + mz-edge\.stream\.co| + movie-s\.nhk\.or + )\.jp| + vidego\.baltimorecity\.gov + )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)''' + _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)'] + _TESTS = [ + { + 'url': 'http://player.piksel.com/v/ums2867l', + 'md5': '34e34c8d89dc2559976a6079db531e85', + 'info_dict': { + 'id': 'ums2867l', + 'ext': 'mp4', + 'title': 'GX-005 with Caption', + 'timestamp': 1481335659, + 'upload_date': '20161210' + } + }, + { + # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al + 'url': 'https://player.piksel.com/v/v80kqp41', + 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', + 'info_dict': { + 'id': 'v80kqp41', + 'ext': 'mp4', + 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', + 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', + 'timestamp': 1486171129, + 'upload_date': '20170204' + } + }, + { + # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/ + 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477', + 'only_matching': True, + } + ] + + def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.com', fatal=True): + url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5') + response = traverse_obj( + self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {} + failure = traverse_obj(response, ('failure', 'reason')) if response else 'Empty response from API' + if failure: + if fatal: + raise ExtractorError(failure, expected=True) + self.report_warning(failure) + return response + + def _real_extract(self, url): + ref_id, display_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, display_id) + app_token = self._search_regex([ + r'clientAPI\s*:\s*"([^"]+)"', + r'data-de-api-key\s*=\s*"([^"]+)"' + ], webpage, 'app token') + query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id} + program = self._call_api( + app_token, 'program', display_id, query, url)['WsProgramResponse']['program'] + video_id = program['uuid'] + video_data = program['asset'] + title = video_data['title'] + asset_type = dict_get(video_data, ['assetType', 'asset_type']) + + formats = [] + + def process_asset_file(asset_file): + if not asset_file: + return + # TODO: extract rtmp formats + http_url = asset_file.get('http_url') + if not http_url: + return + tbr = None + vbr = int_or_none(asset_file.get('videoBitrate'), 1024) + abr = int_or_none(asset_file.get('audioBitrate'), 1024) + if asset_type == 'video': + tbr = vbr + abr + elif asset_type == 'audio': + tbr = abr + + formats.append({ + 'format_id': join_nonempty('http', tbr), + 'url': unescapeHTML(http_url), + 'vbr': vbr, + 'abr': abr, + 'width': int_or_none(asset_file.get('videoWidth')), + 'height': int_or_none(asset_file.get('videoHeight')), + 'filesize': int_or_none(asset_file.get('filesize')), + 'tbr': tbr, + }) + + def process_asset_files(asset_files): + for asset_file in (asset_files or []): + process_asset_file(asset_file) + + process_asset_files(video_data.get('assetFiles')) + process_asset_file(video_data.get('referenceFile')) + if not formats: + asset_id = video_data.get('assetid') or program.get('assetid') + if asset_id: + process_asset_files(try_get(self._call_api( + app_token, 'asset_file', display_id, { + 'assetid': asset_id, + }, url, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) + + m3u8_url = dict_get(video_data, [ + 'm3u8iPadURL', + 'ipadM3u8Url', + 'm3u8AndroidURL', + 'm3u8iPhoneURL', + 'iphoneM3u8Url']) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + + smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil']) + if smil_url: + transform_source = None + if ref_id == 'nhkworld': + # TODO: figure out if this is something to be fixed in urljoin, + # _parse_smil_formats or keep it here + transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"') + formats.extend(self._extract_smil_formats( + re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id, + transform_source=transform_source, fatal=False)) + + subtitles = {} + for caption in video_data.get('captions', []): + caption_url = caption.get('url') + if caption_url: + subtitles.setdefault(caption.get('locale', 'en'), []).append({ + 'url': caption_url}) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnailUrl'), + 'timestamp': parse_iso8601(video_data.get('dateadd')), + 'formats': formats, + 'subtitles': subtitles, + '_format_sort_fields': ('tbr', ), # Incomplete resolution information + } diff --git a/yt_dlp/extractor/pinkbike.py b/yt_dlp/extractor/pinkbike.py new file mode 100644 index 0000000..e4e1caa --- /dev/null +++ b/yt_dlp/extractor/pinkbike.py @@ -0,0 +1,93 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + remove_end, + remove_start, + str_to_int, + unified_strdate, +) + + +class PinkbikeIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.pinkbike.com/video/402811/', + 'md5': '4814b8ca7651034cd87e3361d5c2155a', + 'info_dict': { + 'id': '402811', + 'ext': 'mp4', + 'title': 'Brandon Semenuk - RAW 100', + 'description': 'Official release: www.redbull.ca/rupertwalker', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 100, + 'upload_date': '20150406', + 'uploader': 'revelco', + 'location': 'Victoria, British Columbia, Canada', + 'view_count': int, + 'comment_count': int, + } + }, { + 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://www.pinkbike.com/video/%s' % video_id, video_id) + + formats = [] + for _, format_id, src in re.findall( + r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage): + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None)) + formats.append({ + 'url': src, + 'format_id': format_id, + 'height': height, + }) + + title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike') + description = self._html_search_regex( + r'(?s)id="media-description"[^>]*>(.+?)<', + webpage, 'description', default=None) or remove_start( + self._og_search_description(webpage), title + '. ') + thumbnail = self._og_search_thumbnail(webpage) + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'duration')) + + uploader = self._search_regex( + r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage, + 'uploader', fatal=False) + upload_date = unified_strdate(self._search_regex( + r'class="fullTime"[^>]+title="([^"]+)"', + webpage, 'upload date', fatal=False)) + + location = self._html_search_regex( + r'(?s)<dt>Location</dt>\s*<dd>(.+?)<', + webpage, 'location', fatal=False) + + def extract_count(webpage, label): + return str_to_int(self._search_regex( + r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label, + webpage, label, fatal=False)) + + view_count = extract_count(webpage, 'Views') + comment_count = extract_count(webpage, 'Comments') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'upload_date': upload_date, + 'uploader': uploader, + 'location': location, + 'view_count': view_count, + 'comment_count': comment_count, + 'formats': formats + } diff --git a/yt_dlp/extractor/pinterest.py b/yt_dlp/extractor/pinterest.py new file mode 100644 index 0000000..8361fbb --- /dev/null +++ b/yt_dlp/extractor/pinterest.py @@ -0,0 +1,248 @@ +import json + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, + str_or_none, + strip_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) + + +class PinterestBaseIE(InfoExtractor): + _VALID_URL_BASE = r'''(?x) + https?://(?:[^/]+\.)?pinterest\.(?: + com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx| + dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu| + co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)''' + + def _call_api(self, resource, video_id, options): + return self._download_json( + 'https://www.pinterest.com/resource/%sResource/get/' % resource, + video_id, 'Download %s JSON metadata' % resource, query={ + 'data': json.dumps({'options': options}) + })['resource_response'] + + def _extract_video(self, data, extract_formats=True): + video_id = data['id'] + thumbnails = [] + images = data.get('images') + if isinstance(images, dict): + for thumbnail_id, thumbnail in images.items(): + if not isinstance(thumbnail, dict): + continue + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + info = { + 'title': strip_or_none(traverse_obj(data, 'title', 'grid_title', default='')), + 'description': traverse_obj(data, 'seo_description', 'description'), + 'timestamp': unified_timestamp(data.get('created_at')), + 'thumbnails': thumbnails, + 'uploader': traverse_obj(data, ('closeup_attribution', 'full_name')), + 'uploader_id': str_or_none(traverse_obj(data, ('closeup_attribution', 'id'))), + 'repost_count': int_or_none(data.get('repin_count')), + 'comment_count': int_or_none(data.get('comment_count')), + 'categories': traverse_obj(data, ('pin_join', 'visual_annotation'), expected_type=list), + 'tags': traverse_obj(data, 'hashtags', expected_type=list), + } + + urls = [] + formats = [] + duration = None + domain = data.get('domain', '') + if domain.lower() != 'uploaded by user' and traverse_obj(data, ('embed', 'src')): + if not info['title']: + info['title'] = None + return { + '_type': 'url_transparent', + 'url': data['embed']['src'], + **info, + } + + elif extract_formats: + video_list = traverse_obj( + data, ('videos', 'video_list'), + ('story_pin_data', 'pages', ..., 'blocks', ..., 'video', 'video_list'), + expected_type=dict, get_all=False, default={}) + for format_id, format_dict in video_list.items(): + if not isinstance(format_dict, dict): + continue + format_url = url_or_none(format_dict.get('url')) + if not format_url or format_url in urls: + continue + urls.append(format_url) + duration = float_or_none(format_dict.get('duration'), scale=1000) + ext = determine_ext(format_url) + if 'hls' in format_id.lower() or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'width': int_or_none(format_dict.get('width')), + 'height': int_or_none(format_dict.get('height')), + 'duration': duration, + }) + + return { + 'id': video_id, + 'formats': formats, + 'duration': duration, + 'webpage_url': f'https://www.pinterest.com/pin/{video_id}/', + 'extractor_key': PinterestIE.ie_key(), + 'extractor': PinterestIE.IE_NAME, + **info, + } + + +class PinterestIE(PinterestBaseIE): + _VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE + _TESTS = [{ + # formats found in data['videos'] + 'url': 'https://www.pinterest.com/pin/664281013778109217/', + 'md5': '6550c2af85d6d9f3fe3b88954d1577fc', + 'info_dict': { + 'id': '664281013778109217', + 'ext': 'mp4', + 'title': 'Origami', + 'description': 'md5:e29801cab7d741ea8c741bc50c8d00ab', + 'duration': 57.7, + 'timestamp': 1593073622, + 'upload_date': '20200625', + 'repost_count': int, + 'comment_count': int, + 'categories': list, + 'tags': list, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + }, { + # formats found in data['story_pin_data'] + 'url': 'https://www.pinterest.com/pin/1084663891475263837/', + 'md5': '069ac19919ab9e1e13fa60de46290b03', + 'info_dict': { + 'id': '1084663891475263837', + 'ext': 'mp4', + 'title': 'Gadget, Cool products, Amazon product, technology, Kitchen gadgets', + 'description': 'md5:d0a4b6ae996ff0c6eed83bc869598d13', + 'uploader': 'CoolCrazyGadgets', + 'uploader_id': '1084664028912989237', + 'upload_date': '20211003', + 'timestamp': 1633246654.0, + 'duration': 14.9, + 'comment_count': int, + 'repost_count': int, + 'categories': 'count:9', + 'tags': list, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + }, { + # vimeo.com embed + 'url': 'https://www.pinterest.ca/pin/441282463481903715/', + 'info_dict': { + 'id': '111691128', + 'ext': 'mp4', + 'title': 'Tonite Let\'s All Make Love In London (1967)', + 'description': 'md5:8190f37b3926807809ec57ec21aa77b2', + 'uploader': 'Vimeo', + 'uploader_id': '473792960706651251', + 'upload_date': '20180120', + 'timestamp': 1516409040, + 'duration': 3404, + 'comment_count': int, + 'repost_count': int, + 'categories': 'count:9', + 'tags': [], + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'uploader_url': 'https://vimeo.com/willardandrade', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + 'url': 'https://co.pinterest.com/pin/824721750502199491/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._call_api( + 'Pin', video_id, { + 'field_set_key': 'unauth_react_main_pin', + 'id': video_id, + })['data'] + return self._extract_video(data) + + +class PinterestCollectionIE(PinterestBaseIE): + _VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE + _TESTS = [{ + 'url': 'https://www.pinterest.ca/mashal0407/cool-diys/', + 'info_dict': { + 'id': '585890301462791043', + 'title': 'cool diys', + }, + 'playlist_count': 8, + }, { + 'url': 'https://www.pinterest.ca/fudohub/videos/', + 'info_dict': { + 'id': '682858430939307450', + 'title': 'VIDEOS', + }, + 'playlist_mincount': 365, + 'skip': 'Test with extract_formats=False', + }] + + @classmethod + def suitable(cls, url): + return False if PinterestIE.suitable(url) else super( + PinterestCollectionIE, cls).suitable(url) + + def _real_extract(self, url): + username, slug = self._match_valid_url(url).groups() + board = self._call_api( + 'Board', slug, { + 'slug': slug, + 'username': username + })['data'] + board_id = board['id'] + options = { + 'board_id': board_id, + 'page_size': 250, + } + bookmark = None + entries = [] + while True: + if bookmark: + options['bookmarks'] = [bookmark] + board_feed = self._call_api('BoardFeed', board_id, options) + for item in (board_feed.get('data') or []): + if not isinstance(item, dict) or item.get('type') != 'pin': + continue + video_id = item.get('id') + if video_id: + # Some pins may not be available anonymously via pin URL + # video = self._extract_video(item, extract_formats=False) + # video.update({ + # '_type': 'url_transparent', + # 'url': 'https://www.pinterest.com/pin/%s/' % video_id, + # }) + # entries.append(video) + entries.append(self._extract_video(item)) + bookmark = board_feed.get('bookmark') + if not bookmark: + break + return self.playlist_result( + entries, playlist_id=board_id, playlist_title=board.get('name')) diff --git a/yt_dlp/extractor/pixivsketch.py b/yt_dlp/extractor/pixivsketch.py new file mode 100644 index 0000000..850c6f2 --- /dev/null +++ b/yt_dlp/extractor/pixivsketch.py @@ -0,0 +1,118 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, + unified_timestamp, +) + + +class PixivSketchBaseIE(InfoExtractor): + def _call_api(self, video_id, path, referer, note='Downloading JSON metadata'): + response = self._download_json(f'https://sketch.pixiv.net/api/{path}', video_id, note=note, headers={ + 'Referer': referer, + 'X-Requested-With': referer, + }) + errors = traverse_obj(response, ('errors', ..., 'message')) + if errors: + raise ExtractorError(' '.join(f'{e}.' for e in errors)) + return response.get('data') or {} + + +class PixivSketchIE(PixivSketchBaseIE): + IE_NAME = 'pixiv:sketch' + _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<uploader_id>[a-zA-Z0-9_-]+)/lives/(?P<id>\d+)/?' + _TESTS = [{ + 'url': 'https://sketch.pixiv.net/@nuhutya/lives/3654620468641830507', + 'info_dict': { + 'id': '7370666691623196569', + 'title': 'まにあえクリスマス!', + 'uploader': 'ぬふちゃ', + 'uploader_id': 'nuhutya', + 'channel_id': '9844815', + 'age_limit': 0, + 'timestamp': 1640351536, + }, + 'skip': True, + }, { + # these two (age_limit > 0) requires you to login on website, but it's actually not required for download + 'url': 'https://sketch.pixiv.net/@namahyou/lives/4393103321546851377', + 'info_dict': { + 'id': '4907995960957946943', + 'title': 'クリスマスなんて知らん🖕', + 'uploader': 'すゃもり', + 'uploader_id': 'suya2mori2', + 'channel_id': '31169300', + 'age_limit': 15, + 'timestamp': 1640347640, + }, + 'skip': True, + }, { + 'url': 'https://sketch.pixiv.net/@8aki/lives/3553803162487249670', + 'info_dict': { + 'id': '1593420639479156945', + 'title': 'おまけ本作業(リョナ有)', + 'uploader': 'おぶい / Obui', + 'uploader_id': 'oving', + 'channel_id': '17606', + 'age_limit': 18, + 'timestamp': 1640330263, + }, + 'skip': True, + }] + + def _real_extract(self, url): + video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') + data = self._call_api(video_id, f'lives/{video_id}.json', url) + + if not traverse_obj(data, 'is_broadcasting'): + raise ExtractorError(f'This live is offline. Use https://sketch.pixiv.net/@{uploader_id} for ongoing live.', expected=True) + + m3u8_url = traverse_obj(data, ('owner', 'hls_movie', 'url')) + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + + return { + 'id': video_id, + 'title': data.get('name'), + 'formats': formats, + 'uploader': traverse_obj(data, ('user', 'name'), ('owner', 'user', 'name')), + 'uploader_id': traverse_obj(data, ('user', 'unique_name'), ('owner', 'user', 'unique_name')), + 'channel_id': str(traverse_obj(data, ('user', 'pixiv_user_id'), ('owner', 'user', 'pixiv_user_id'))), + 'age_limit': 18 if data.get('is_r18') else 15 if data.get('is_r15') else 0, + 'timestamp': unified_timestamp(data.get('created_at')), + 'is_live': True + } + + +class PixivSketchUserIE(PixivSketchBaseIE): + IE_NAME = 'pixiv:sketch:user' + _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<id>[a-zA-Z0-9_-]+)/?' + _TESTS = [{ + 'url': 'https://sketch.pixiv.net/@nuhutya', + 'only_matching': True, + }, { + 'url': 'https://sketch.pixiv.net/@namahyou', + 'only_matching': True, + }, { + 'url': 'https://sketch.pixiv.net/@8aki', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return super(PixivSketchUserIE, cls).suitable(url) and not PixivSketchIE.suitable(url) + + def _real_extract(self, url): + user_id = self._match_id(url) + data = self._call_api(user_id, f'lives/users/@{user_id}.json', url) + + if not traverse_obj(data, 'is_broadcasting'): + try: + self._call_api(user_id, 'users/current.json', url, 'Investigating reason for request failure') + except ExtractorError as ex: + if ex.cause and ex.cause.code == 401: + self.raise_login_required(f'Please log in, or use direct link like https://sketch.pixiv.net/@{user_id}/1234567890', method='cookies') + raise ExtractorError('This user is offline', expected=True) + + return self.url_result(f'https://sketch.pixiv.net/@{user_id}/lives/{data["id"]}') diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py new file mode 100644 index 0000000..d67f600 --- /dev/null +++ b/yt_dlp/extractor/pladform.py @@ -0,0 +1,135 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + parse_qs, + xpath_text, + qualities, +) + + +class PladformIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?: + out\.pladform\.ru/player| + static\.pladform\.ru/player\.swf + ) + \?.*\bvideoid=| + video\.pladform\.ru/catalog/video/videoid/ + ) + (?P<id>\d+) + ''' + _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1'] + _TESTS = [{ + 'url': 'http://out.pladform.ru/player?pl=18079&type=html5&videoid=100231282', + 'info_dict': { + 'id': '6216d548e755edae6e8280667d774791', + 'ext': 'mp4', + 'timestamp': 1406117012, + 'title': 'Гарик Мартиросян и Гарик Харламов - Кастинг на концерт ко Дню милиции', + 'age_limit': 0, + 'upload_date': '20140723', + 'thumbnail': str, + 'view_count': int, + 'description': str, + 'uploader_id': '12082', + 'uploader': 'Comedy Club', + 'duration': 367, + }, + 'expected_warnings': ['HTTP Error 404: Not Found'] + }, { + 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0', + 'md5': '53362fac3a27352da20fa2803cc5cd6f', + 'info_dict': { + 'id': '3777899', + 'ext': 'mp4', + 'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко', + 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3190, + }, + }, { + 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0', + 'only_matching': True, + }, { + 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + qs = parse_qs(url) + pl = qs.get('pl', ['1'])[0] + + video = self._download_xml( + 'http://out.pladform.ru/getVideo', video_id, query={ + 'pl': pl, + 'videoid': video_id, + }, fatal=False) + + def fail(text): + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, text), + expected=True) + + if not video: + targetUrl = self._request_webpage(url, video_id, note='Resolving final URL').url + if targetUrl == url: + raise ExtractorError('Can\'t parse page') + return self.url_result(targetUrl) + + if video.tag == 'error': + fail(video.text) + + quality = qualities(('ld', 'sd', 'hd')) + + formats = [] + for src in video.findall('./src'): + if src is None: + continue + format_url = src.text + if not format_url: + continue + if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src.text, + 'format_id': src.get('quality'), + 'quality': quality(src.get('quality')), + }) + + if not formats: + error = xpath_text(video, './cap', 'error', default=None) + if error: + fail(error) + + webpage = self._download_webpage( + 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id, + video_id) + + title = self._og_search_title(webpage, fatal=False) or xpath_text( + video, './/title', 'title', fatal=True) + description = self._search_regex( + r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) or xpath_text( + video, './/cover', 'cover') + + duration = int_or_none(xpath_text(video, './/time', 'duration')) + age_limit = int_or_none(xpath_text(video, './/age18', 'age limit')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py new file mode 100644 index 0000000..a4b612a --- /dev/null +++ b/yt_dlp/extractor/planetmarathi.py @@ -0,0 +1,71 @@ +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_strdate, +) + + +class PlanetMarathiIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)' + _TESTS = [{ + 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', + 'playlist_mincount': 2, + 'info_dict': { + 'id': 'ek-unad-divas', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas', + 'ext': 'mp4', + 'title': 'ek unad divas', + 'alt_title': 'चित्रपट', + 'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881', + 'episode_number': 1, + 'duration': 5539, + 'upload_date': '20210829', + }, + }] # Trailer skipped + }, { + 'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1', + 'playlist_mincount': 10, + 'info_dict': { + 'id': 'baap-beep-baap-season-1', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1', + 'ext': 'mp4', + 'title': 'Manohar Kanhere', + 'alt_title': 'मनोहर कान्हेरे', + 'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6', + 'season_number': 1, + 'episode_number': 1, + 'duration': 29, + 'upload_date': '20210829', + }, + }] # Trailers, Episodes, other Character profiles skipped + }] + + def _real_extract(self, url): + id = self._match_id(url) + entries = [] + json_data = self._download_json(f'https://www.planetmarathi.com/api/v1/titles/{id}/assets', id)['assets'] + for asset in json_data: + asset_title = asset['mediaAssetName']['en'] + if asset_title == 'Movie': + asset_title = id.replace('-', ' ') + asset_id = f'{asset["sk"]}_{id}'.replace('#', '-') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id) + entries.append({ + 'id': asset_id, + 'title': asset_title, + 'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']), + 'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']), + 'season_number': asset.get('mediaAssetSeason'), + 'episode_number': asset.get('mediaAssetIndexForAssetType'), + 'duration': asset.get('mediaAssetDurationInSeconds'), + 'upload_date': unified_strdate(asset.get('created')), + 'formats': formats, + 'subtitles': subtitles, + }) + return self.playlist_result(entries, playlist_id=id) diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py new file mode 100644 index 0000000..166b98c --- /dev/null +++ b/yt_dlp/extractor/platzi.py @@ -0,0 +1,213 @@ +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_str, +) +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + str_or_none, + try_get, + url_or_none, + urlencode_postdata, + urljoin, +) + + +class PlatziBaseIE(InfoExtractor): + _LOGIN_URL = 'https://platzi.com/login/' + _NETRC_MACHINE = 'platzi' + + def _perform_login(self, username, password): + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'email': username, + 'password': password, + }) + + urlh = self._request_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + # login succeeded + if 'platzi.com/login' not in urlh.url: + return + + login_error = self._webpage_read_content( + urlh, self._LOGIN_URL, None, 'Downloading login error page') + + login = self._parse_json( + self._search_regex( + r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'), + None) + + for kind in ('error', 'password', 'nonFields'): + error = str_or_none(login.get('%sError' % kind)) + if error: + raise ExtractorError( + 'Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + +class PlatziIE(PlatziBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/[^/]+/(?P<id>\d+)-[^/?\#&]+ + ''' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + lecture_id = self._match_id(url) + + webpage = self._download_webpage(url, lecture_id) + + data = self._parse_json( + self._search_regex( + # client_data may contain "};" so that we have to try more + # strict regex first + (r'client_data\s*=\s*({.+?})\s*;\s*\n', + r'client_data\s*=\s*({.+?})\s*;'), + webpage, 'client data'), + lecture_id) + + material = data['initialState']['material'] + desc = material['description'] + title = desc['title'] + + formats = [] + for server_id, server in material['videos'].items(): + if not isinstance(server, dict): + continue + for format_id in ('hls', 'dash'): + format_url = url_or_none(server.get(format_id)) + if not format_url: + continue + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, lecture_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id, + note='Downloading %s m3u8 information' % server_id, + fatal=False)) + elif format_id == 'dash': + formats.extend(self._extract_mpd_formats( + format_url, lecture_id, mpd_id=format_id, + note='Downloading %s MPD manifest' % server_id, + fatal=False)) + + content = str_or_none(desc.get('content')) + description = (clean_html(compat_b64decode(content).decode('utf-8')) + if content else None) + duration = int_or_none(material.get('duration'), invscale=60) + + return { + 'id': lecture_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': formats, + } + + +class PlatziCourseIE(PlatziBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/(?P<id>[^/?\#&]+) + ''' + _TESTS = [{ + 'url': 'https://platzi.com/clases/next-js/', + 'info_dict': { + 'id': '1311', + 'title': 'Curso de Next.js', + }, + 'playlist_count': 22, + }, { + 'url': 'https://courses.platzi.com/classes/communication-codestream/', + 'info_dict': { + 'id': '1367', + 'title': 'Codestream Course', + }, + 'playlist_count': 14, + }] + + @classmethod + def suitable(cls, url): + return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_name = self._match_id(url) + + webpage = self._download_webpage(url, course_name) + + props = self._parse_json( + self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), + course_name)['initialProps'] + + entries = [] + for chapter_num, chapter in enumerate(props['concepts'], 1): + if not isinstance(chapter, dict): + continue + materials = chapter.get('materials') + if not materials or not isinstance(materials, list): + continue + chapter_title = chapter.get('title') + chapter_id = str_or_none(chapter.get('id')) + for material in materials: + if not isinstance(material, dict): + continue + if material.get('material_type') != 'video': + continue + video_url = urljoin(url, material.get('url')) + if not video_url: + continue + entries.append({ + '_type': 'url_transparent', + 'url': video_url, + 'title': str_or_none(material.get('name')), + 'id': str_or_none(material.get('id')), + 'ie_key': PlatziIE.ie_key(), + 'chapter': chapter_title, + 'chapter_number': chapter_num, + 'chapter_id': chapter_id, + }) + + course_id = compat_str(try_get(props, lambda x: x['course']['id'])) + course_title = try_get(props, lambda x: x['course']['name'], compat_str) + + return self.playlist_result(entries, course_id, course_title) diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py new file mode 100644 index 0000000..a4439c8 --- /dev/null +++ b/yt_dlp/extractor/playplustv.py @@ -0,0 +1,100 @@ +import json + +from .common import InfoExtractor +from ..networking import PUTRequest +from ..networking.exceptions import HTTPError +from ..utils import ExtractorError, clean_html, int_or_none + + +class PlayPlusTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' + _TEST = { + 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', + 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', + 'info_dict': { + 'id': 'db8d274a5163424e967f35a30ddafb8e', + 'ext': 'mp4', + 'title': 'Capítulo 179 - Final', + 'description': 'md5:01085d62d8033a1e34121d3c3cabc838', + 'timestamp': 1529992740, + 'upload_date': '20180626', + }, + 'skip': 'Requires account credential', + } + _NETRC_MACHINE = 'playplustv' + _GEO_COUNTRIES = ['BR'] + _token = None + _profile_id = None + + def _call_api(self, resource, video_id=None, query=None): + return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={ + 'Authorization': 'Bearer ' + self._token, + }, query=query) + + def _perform_login(self, username, password): + req = PUTRequest( + 'https://api.playplus.tv/api/web/login', json.dumps({ + 'email': username, + 'password': password, + }).encode(), { + 'Content-Type': 'application/json; charset=utf-8', + }) + + try: + self._token = self._download_json(req, None)['token'] + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + raise ExtractorError(self._parse_json( + e.cause.response.read(), None)['errorMessage'], expected=True) + raise + + self._profile = self._call_api('Profiles')['list'][0]['_id'] + + def _real_initialize(self): + if not self._token: + self.raise_login_required(method='password') + + def _real_extract(self, url): + project_id, media_id = self._match_valid_url(url).groups() + media = self._call_api( + 'Media', media_id, { + 'profileId': self._profile, + 'projectId': project_id, + 'mediaId': media_id, + })['obj'] + title = media['title'] + + formats = [] + for f in media.get('files', []): + f_url = f.get('url') + if not f_url: + continue + file_info = f.get('fileInfo') or {} + formats.append({ + 'url': f_url, + 'width': int_or_none(file_info.get('width')), + 'height': int_or_none(file_info.get('height')), + }) + + thumbnails = [] + for thumb in media.get('thumbs', []): + thumb_url = thumb.get('url') + if not thumb_url: + continue + thumbnails.append({ + 'url': thumb_url, + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + }) + + return { + 'id': media_id, + 'title': title, + 'formats': formats, + 'thumbnails': thumbnails, + 'description': clean_html(media.get('description')) or media.get('shortDescription'), + 'timestamp': int_or_none(media.get('publishDate'), 1000), + 'view_count': int_or_none(media.get('numberOfViews')), + 'comment_count': int_or_none(media.get('numberOfComments')), + 'tags': media.get('tags'), + } diff --git a/yt_dlp/extractor/playsuisse.py b/yt_dlp/extractor/playsuisse.py new file mode 100644 index 0000000..7c5cad1 --- /dev/null +++ b/yt_dlp/extractor/playsuisse.py @@ -0,0 +1,234 @@ +import json + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + parse_qs, + traverse_obj, + update_url_query, + urlencode_postdata, +) + + +class PlaySuisseIE(InfoExtractor): + _NETRC_MACHINE = 'playsuisse' + _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)' + _TESTS = [ + { + # Old URL + 'url': 'https://www.playsuisse.ch/watch/763211/0', + 'only_matching': True, + }, + { + # episode in a series + 'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211', + 'md5': '82df2a470b2dfa60c2d33772a8a60cf8', + 'info_dict': { + 'id': '763211', + 'ext': 'mp4', + 'title': 'Knochen', + 'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8', + 'duration': 3344, + 'series': 'Wilder', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Knochen', + 'episode_number': 1, + 'thumbnail': 're:https://playsuisse-img.akamaized.net/', + } + }, { + # film + 'url': 'https://www.playsuisse.ch/watch/808675', + 'md5': '818b94c1d2d7c4beef953f12cb8f3e75', + 'info_dict': { + 'id': '808675', + 'ext': 'mp4', + 'title': 'Der Läufer', + 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd', + 'duration': 5280, + 'thumbnail': 're:https://playsuisse-img.akamaized.net/', + } + }, { + # series (treated as a playlist) + 'url': 'https://www.playsuisse.ch/detail/1115687', + 'info_dict': { + 'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3', + 'id': '1115687', + 'series': 'They all came out to Montreux', + 'title': 'They all came out to Montreux', + }, + 'playlist': [{ + 'info_dict': { + 'description': 'md5:f2462744834b959a31adc6292380cda2', + 'duration': 3180, + 'episode': 'Folge 1', + 'episode_number': 1, + 'id': '1112663', + 'season': 'Season 1', + 'season_number': 1, + 'series': 'They all came out to Montreux', + 'thumbnail': 're:https://playsuisse-img.akamaized.net/', + 'title': 'Folge 1', + 'ext': 'mp4' + }, + }, { + 'info_dict': { + 'description': 'md5:9dfd308699fe850d3bce12dc1bad9b27', + 'duration': 2935, + 'episode': 'Folge 2', + 'episode_number': 2, + 'id': '1112661', + 'season': 'Season 1', + 'season_number': 1, + 'series': 'They all came out to Montreux', + 'thumbnail': 're:https://playsuisse-img.akamaized.net/', + 'title': 'Folge 2', + 'ext': 'mp4' + }, + }, { + 'info_dict': { + 'description': 'md5:14a93a3356b2492a8f786ab2227ef602', + 'duration': 2994, + 'episode': 'Folge 3', + 'episode_number': 3, + 'id': '1112664', + 'season': 'Season 1', + 'season_number': 1, + 'series': 'They all came out to Montreux', + 'thumbnail': 're:https://playsuisse-img.akamaized.net/', + 'title': 'Folge 3', + 'ext': 'mp4' + } + }], + } + ] + + _GRAPHQL_QUERY = ''' + query AssetWatch($assetId: ID!) { + assetV2(id: $assetId) { + ...Asset + episodes { + ...Asset + } + } + } + fragment Asset on AssetV2 { + id + name + description + duration + episodeNumber + seasonNumber + seriesName + medias { + type + url + } + thumbnail16x9 { + ...ImageDetails + } + thumbnail2x3 { + ...ImageDetails + } + thumbnail16x9WithTitle { + ...ImageDetails + } + thumbnail2x3WithTitle { + ...ImageDetails + } + } + fragment ImageDetails on AssetImage { + id + url + }''' + _LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com' + _LOGIN_PATH = 'B2C_1A__SignInV2' + _ID_TOKEN = None + + def _perform_login(self, username, password): + login_page = self._download_webpage( + 'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page', + query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'}) + settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None) + + csrf_token = settings['csrf'] + query = {'tx': settings['transId'], 'p': self._LOGIN_PATH} + + status = traverse_obj(self._download_json( + f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in', + query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({ + 'request_type': 'RESPONSE', + 'signInName': username, + 'password': password + }), expected_status=400), ('status', {int_or_none})) + if status == 400: + raise ExtractorError('Invalid username or password', expected=True) + + urlh = self._request_webpage( + f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed', + None, 'Downloading ID token', query={ + 'rememberMe': 'false', + 'csrf_token': csrf_token, + **query, + 'diags': '', + }) + + self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0)) + if not self._ID_TOKEN: + raise ExtractorError('Login failed') + + def _get_media_data(self, media_id): + # NOTE In the web app, the "locale" header is used to switch between languages, + # However this doesn't seem to take effect when passing the header here. + response = self._download_json( + 'https://www.playsuisse.ch/api/graphql', + media_id, data=json.dumps({ + 'operationName': 'AssetWatch', + 'query': self._GRAPHQL_QUERY, + 'variables': {'assetId': media_id} + }).encode('utf-8'), + headers={'Content-Type': 'application/json', 'locale': 'de'}) + + return response['data']['assetV2'] + + def _real_extract(self, url): + if not self._ID_TOKEN: + self.raise_login_required(method='password') + + media_id = self._match_id(url) + media_data = self._get_media_data(media_id) + info = self._extract_single(media_data) + if media_data.get('episodes'): + info.update({ + '_type': 'playlist', + 'entries': map(self._extract_single, media_data['episodes']), + }) + return info + + def _extract_single(self, media_data): + thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail')) + + formats, subtitles = [], {} + for media in traverse_obj(media_data, 'medias', default=[]): + if not media.get('url') or media.get('type') != 'HLS': + continue + f, subs = self._extract_m3u8_formats_and_subtitles( + update_url_query(media['url'], {'id_token': self._ID_TOKEN}), + media_data['id'], 'mp4', m3u8_id='HLS', fatal=False) + formats.extend(f) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': media_data['id'], + 'title': media_data.get('name'), + 'description': media_data.get('description'), + 'thumbnails': thumbnails, + 'duration': int_or_none(media_data.get('duration')), + 'formats': formats, + 'subtitles': subtitles, + 'series': media_data.get('seriesName'), + 'season_number': int_or_none(media_data.get('seasonNumber')), + 'episode': media_data.get('name') if media_data.get('episodeNumber') else None, + 'episode_number': int_or_none(media_data.get('episodeNumber')), + } diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py new file mode 100644 index 0000000..c418f88 --- /dev/null +++ b/yt_dlp/extractor/playtvak.py @@ -0,0 +1,185 @@ +from .common import InfoExtractor +from ..compat import ( + compat_urlparse, + compat_urllib_parse_urlencode, +) +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, + qualities, +) + + +class PlaytvakIE(InfoExtractor): + IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz' + _VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)' + _TESTS = [{ + 'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko', + 'md5': '4525ae312c324b4be2f4603cc78ceb4a', + 'info_dict': { + 'id': 'A150730_150323_hodinovy-manzel_kuko', + 'ext': 'mp4', + 'title': 'Vyžeňte vosy a sršně ze zahrady', + 'description': 'md5:4436e61b7df227a093778efb7e373571', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'duration': 279, + 'timestamp': 1438732860, + 'upload_date': '20150805', + 'is_live': False, + } + }, { # live video test + 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat', + 'info_dict': { + 'id': 'A150624_164934_planespotting_cat', + 'ext': 'flv', + 'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze', + 'is_live': True, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + }, { # another live stream, this one without Misc.videoFLV + 'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap', + 'info_dict': { + 'id': 'A151218_145728_hlavni-nadrazi_plap', + 'ext': 'flv', + 'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + }, { # idnes.cz + 'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku', + 'md5': '819832ba33cd7016e58a6658577fe289', + 'info_dict': { + 'id': 'A150809_104116_domaci_pku', + 'ext': 'mp4', + 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se', + 'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'duration': 39, + 'timestamp': 1438969140, + 'upload_date': '20150807', + 'is_live': False, + } + }, { # lidovky.cz + 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE', + 'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8', + 'info_dict': { + 'id': 'A150808_214044_ln-video_ELE', + 'ext': 'mp4', + 'title': 'Táhni! Demonstrace proti imigrantům budila emoce', + 'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'timestamp': 1439052180, + 'upload_date': '20150808', + 'is_live': False, + } + }, { # metro.cz + 'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row', + 'md5': '84fc1deedcac37b7d4a6ccae7c716668', + 'info_dict': { + 'id': 'A141111_173251_metro-extra_row', + 'ext': 'mp4', + 'title': 'Recesisté udělali z billboardu kolotoč', + 'description': 'md5:7369926049588c3989a66c9c1a043c4c', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'timestamp': 1415725500, + 'upload_date': '20141111', + 'is_live': False, + } + }, { + 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + info_url = self._html_search_regex( + r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url') + + parsed_url = compat_urlparse.urlparse(info_url) + + qs = compat_urlparse.parse_qs(parsed_url.query) + qs.update({ + 'reklama': ['0'], + 'type': ['js'], + }) + + info_url = compat_urlparse.urlunparse( + parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) + + json_info = self._download_json( + info_url, video_id, + transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) + + item = None + for i in json_info['items']: + if i.get('type') == 'video' or i.get('type') == 'stream': + item = i + break + if not item: + raise ExtractorError('No suitable stream found') + + quality = qualities(('low', 'middle', 'high')) + + formats = [] + for fmt in item['video']: + video_url = fmt.get('file') + if not video_url: + continue + + format_ = fmt['format'] + format_id = '%s_%s' % (format_, fmt['quality']) + preference = None + + if format_ in ('mp4', 'webm'): + ext = format_ + elif format_ == 'rtmp': + ext = 'flv' + elif format_ == 'apple': + ext = 'mp4' + # Some streams have mp3 audio which does not play + # well with ffmpeg filter aac_adtstoasc + preference = -10 + elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests + continue + else: # Other formats not supported yet + continue + + formats.append({ + 'url': video_url, + 'ext': ext, + 'format_id': format_id, + 'quality': quality(fmt.get('quality')), + 'preference': preference, + }) + + title = item['title'] + is_live = item['type'] == 'stream' + description = self._og_search_description(webpage, default=None) or self._html_search_meta( + 'description', webpage, 'description', default=None) + timestamp = None + duration = None + if not is_live: + duration = int_or_none(item.get('length')) + timestamp = item.get('published') + if timestamp: + timestamp = parse_iso8601(timestamp[:-5]) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': item.get('image'), + 'duration': duration, + 'timestamp': timestamp, + 'is_live': is_live, + 'formats': formats, + } diff --git a/yt_dlp/extractor/playwire.py b/yt_dlp/extractor/playwire.py new file mode 100644 index 0000000..1057bff --- /dev/null +++ b/yt_dlp/extractor/playwire.py @@ -0,0 +1,72 @@ +from .common import InfoExtractor +from ..utils import ( + dict_get, + float_or_none, +) + + +class PlaywireIE(InfoExtractor): + _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)' + _EMBED_REGEX = [r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1'] + + _TESTS = [{ + 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json', + 'md5': 'e6398701e3595888125729eaa2329ed9', + 'info_dict': { + 'id': '3353705', + 'ext': 'mp4', + 'title': 'S04_RM_UCL_Rus', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 145.94, + }, + }, { + # m3u8 in f4m + 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', + 'info_dict': { + 'id': '4840492', + 'ext': 'mp4', + 'title': 'ITV EL SHOW FULL', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # Multiple resolutions while bitrates missing + 'url': 'http://cdn.playwire.com/11625/embed/85228.html', + 'only_matching': True, + }, { + 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json', + 'only_matching': True, + }, { + 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id') + + player = self._download_json( + 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id), + video_id) + + title = player['settings']['title'] + duration = float_or_none(player.get('duration'), 1000) + + content = player['content'] + thumbnail = content.get('poster') + src = content['media']['f4m'] + + formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls') + for a_format in formats: + if not dict_get(a_format, ['tbr', 'width', 'height']): + a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py new file mode 100644 index 0000000..809b656 --- /dev/null +++ b/yt_dlp/extractor/pluralsight.py @@ -0,0 +1,491 @@ +import collections +import json +import os +import random +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + dict_get, + ExtractorError, + float_or_none, + int_or_none, + parse_duration, + parse_qs, + qualities, + srt_subtitles_timecode, + try_get, + update_url_query, + urlencode_postdata, +) + + +class PluralsightBaseIE(InfoExtractor): + _API_BASE = 'https://app.pluralsight.com' + + _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE + _GRAPHQL_HEADERS = { + 'Content-Type': 'application/json;charset=UTF-8', + } + _GRAPHQL_COURSE_TMPL = ''' +query BootstrapPlayer { + rpc { + bootstrapPlayer { + profile { + firstName + lastName + email + username + userHandle + authed + isAuthed + plan + } + course(courseId: "%s") { + name + title + courseHasCaptions + translationLanguages { + code + name + } + supportsWideScreenVideoFormats + timestamp + modules { + name + title + duration + formattedDuration + author + authorized + clips { + authorized + clipId + duration + formattedDuration + id + index + moduleIndex + moduleTitle + name + title + watched + } + } + } + } + } +}''' + + def _download_course(self, course_id, url, display_id): + try: + return self._download_course_rpc(course_id, url, display_id) + except ExtractorError: + # Old API fallback + return self._download_json( + 'https://app.pluralsight.com/player/user/api/v1/player/payload', + display_id, data=urlencode_postdata({'courseId': course_id}), + headers={'Referer': url}) + + def _download_course_rpc(self, course_id, url, display_id): + response = self._download_json( + self._GRAPHQL_EP, display_id, data=json.dumps({ + 'query': self._GRAPHQL_COURSE_TMPL % course_id, + 'variables': {} + }).encode('utf-8'), headers=self._GRAPHQL_HEADERS) + + course = try_get( + response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'], + dict) + if course: + return course + + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['error']['message']), + expected=True) + + +class PluralsightIE(PluralsightBaseIE): + IE_NAME = 'pluralsight' + _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?' + _LOGIN_URL = 'https://app.pluralsight.com/id/' + + _NETRC_MACHINE = 'pluralsight' + + _TESTS = [{ + 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', + 'md5': '4d458cf5cf4c593788672419a8dd4cf8', + 'info_dict': { + 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', + 'ext': 'mp4', + 'title': 'Demo Monitoring', + 'duration': 338, + }, + 'skip': 'Requires pluralsight account credentials', + }, { + 'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live', + 'only_matching': True, + }, { + # available without pluralsight account + 'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started', + 'only_matching': True, + }, { + 'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0', + 'only_matching': True, + }] + + GRAPHQL_VIEWCLIP_TMPL = ''' +query viewClip { + viewClip(input: { + author: "%(author)s", + clipIndex: %(clipIndex)d, + courseName: "%(courseName)s", + includeCaptions: %(includeCaptions)s, + locale: "%(locale)s", + mediaType: "%(mediaType)s", + moduleName: "%(moduleName)s", + quality: "%(quality)s" + }) { + urls { + url + cdn + rank + source + }, + status + } +}''' + + def _perform_login(self, username, password): + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'Username': username, + 'Password': password, + }) + + post_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, + 'post url', default=self._LOGIN_URL, group='url') + + if not post_url.startswith('http'): + post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + + response = self._download_webpage( + post_url, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + error = self._search_regex( + r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + if all(not re.search(p, response) for p in ( + r'__INITIAL_STATE__', r'["\']currentUser["\']', + # new layout? + r'>\s*Sign out\s*<')): + BLOCKED = 'Your account has been blocked due to suspicious activity' + if BLOCKED in response: + raise ExtractorError( + 'Unable to login: %s' % BLOCKED, expected=True) + MUST_AGREE = 'To continue using Pluralsight, you must agree to' + if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')): + raise ExtractorError( + 'Unable to login: %s some documents. Go to pluralsight.com, ' + 'log in and agree with what Pluralsight requires.' + % MUST_AGREE, expected=True) + + raise ExtractorError('Unable to log in') + + def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id): + captions = None + if clip_id: + captions = self._download_json( + '%s/transcript/api/v1/caption/json/%s/%s' + % (self._API_BASE, clip_id, lang), video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False) + if not captions: + captions_post = { + 'a': author, + 'cn': int(clip_idx), + 'lc': lang, + 'm': name, + } + captions = self._download_json( + '%s/player/retrieve-captions' % self._API_BASE, video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False, data=json.dumps(captions_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) + if captions: + return { + lang: [{ + 'ext': 'json', + 'data': json.dumps(captions), + }, { + 'ext': 'srt', + 'data': self._convert_subtitles(duration, captions), + }] + } + + @staticmethod + def _convert_subtitles(duration, subs): + srt = '' + TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset') + TEXT_KEYS = ('text', 'Text') + for num, current in enumerate(subs): + current = subs[num] + start, text = ( + float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)), + dict_get(current, TEXT_KEYS)) + if start is None or text is None: + continue + end = duration if num == len(subs) - 1 else float_or_none( + dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False)) + if end is None: + continue + srt += os.linesep.join( + ( + '%d' % num, + '%s --> %s' % ( + srt_subtitles_timecode(start), + srt_subtitles_timecode(end)), + text, + os.linesep, + )) + return srt + + def _real_extract(self, url): + qs = parse_qs(url) + + author = qs.get('author', [None])[0] + name = qs.get('name', [None])[0] + clip_idx = qs.get('clip', [None])[0] + course_name = qs.get('course', [None])[0] + + if any(not f for f in (author, name, clip_idx, course_name,)): + raise ExtractorError('Invalid URL', expected=True) + + display_id = '%s-%s' % (name, clip_idx) + + course = self._download_course(course_name, url, display_id) + + collection = course['modules'] + + clip = None + + for module_ in collection: + if name in (module_.get('moduleName'), module_.get('name')): + for clip_ in module_.get('clips', []): + clip_index = clip_.get('clipIndex') + if clip_index is None: + clip_index = clip_.get('index') + if clip_index is None: + continue + if compat_str(clip_index) == clip_idx: + clip = clip_ + break + + if not clip: + raise ExtractorError('Unable to resolve clip') + + title = clip['title'] + clip_id = clip.get('clipName') or clip.get('name') or clip['clipId'] + + QUALITIES = { + 'low': {'width': 640, 'height': 480}, + 'medium': {'width': 848, 'height': 640}, + 'high': {'width': 1024, 'height': 768}, + 'high-widescreen': {'width': 1280, 'height': 720}, + } + + QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',) + quality_key = qualities(QUALITIES_PREFERENCE) + + AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities']) + + ALLOWED_QUALITIES = ( + AllowedQuality('webm', ['high', ]), + AllowedQuality('mp4', ['low', 'medium', 'high', ]), + ) + + # Some courses also offer widescreen resolution for high quality (see + # https://github.com/ytdl-org/youtube-dl/issues/7766) + widescreen = course.get('supportsWideScreenVideoFormats') is True + best_quality = 'high-widescreen' if widescreen else 'high' + if widescreen: + for allowed_quality in ALLOWED_QUALITIES: + allowed_quality.qualities.append(best_quality) + + # In order to minimize the number of calls to ViewClip API and reduce + # the probability of being throttled or banned by Pluralsight we will request + # only single format until formats listing was explicitly requested. + if self.get_param('listformats', False): + allowed_qualities = ALLOWED_QUALITIES + else: + def guess_allowed_qualities(): + req_format = self.get_param('format') or 'best' + req_format_split = req_format.split('-', 1) + if len(req_format_split) > 1: + req_ext, req_quality = req_format_split + req_quality = '-'.join(req_quality.split('-')[:2]) + for allowed_quality in ALLOWED_QUALITIES: + if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: + return (AllowedQuality(req_ext, (req_quality, )), ) + req_ext = 'webm' if self.get_param('prefer_free_formats') else 'mp4' + return (AllowedQuality(req_ext, (best_quality, )), ) + allowed_qualities = guess_allowed_qualities() + + formats = [] + for ext, qualities_ in allowed_qualities: + for quality in qualities_: + f = QUALITIES[quality].copy() + clip_post = { + 'author': author, + 'includeCaptions': 'false', + 'clipIndex': int(clip_idx), + 'courseName': course_name, + 'locale': 'en', + 'moduleName': name, + 'mediaType': ext, + 'quality': '%dx%d' % (f['width'], f['height']), + } + format_id = '%s-%s' % (ext, quality) + + try: + viewclip = self._download_json( + self._GRAPHQL_EP, display_id, + 'Downloading %s viewclip graphql' % format_id, + data=json.dumps({ + 'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post, + 'variables': {} + }).encode('utf-8'), + headers=self._GRAPHQL_HEADERS)['data']['viewClip'] + except ExtractorError: + # Still works but most likely will go soon + viewclip = self._download_json( + '%s/video/clips/viewclip' % self._API_BASE, display_id, + 'Downloading %s viewclip JSON' % format_id, fatal=False, + data=json.dumps(clip_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) + + # Pluralsight tracks multiple sequential calls to ViewClip API and start + # to return 429 HTTP errors after some time (see + # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead + # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842). + # To somewhat reduce the probability of these consequences + # we will sleep random amount of time before each call to ViewClip. + self._sleep( + random.randint(5, 10), display_id, + '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling') + + if not viewclip: + continue + + clip_urls = viewclip.get('urls') + if not isinstance(clip_urls, list): + continue + + for clip_url_data in clip_urls: + clip_url = clip_url_data.get('url') + if not clip_url: + continue + cdn = clip_url_data.get('cdn') + clip_f = f.copy() + clip_f.update({ + 'url': clip_url, + 'ext': ext, + 'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id, + 'quality': quality_key(quality), + 'source_preference': int_or_none(clip_url_data.get('rank')), + }) + formats.append(clip_f) + + duration = int_or_none( + clip.get('duration')) or parse_duration(clip.get('formattedDuration')) + + # TODO: other languages? + subtitles = self.extract_subtitles( + author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id) + + return { + 'id': clip_id, + 'title': title, + 'duration': duration, + 'creator': author, + 'formats': formats, + 'subtitles': subtitles, + } + + +class PluralsightCourseIE(PluralsightBaseIE): + IE_NAME = 'pluralsight:course' + _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)' + _TESTS = [{ + # Free course from Pluralsight Starter Subscription for Microsoft TechNet + # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz + 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', + 'info_dict': { + 'id': 'hosting-sql-server-windows-azure-iaas', + 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals', + 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', + }, + 'playlist_count': 31, + }, { + # available without pluralsight account + 'url': 'https://www.pluralsight.com/courses/angularjs-get-started', + 'only_matching': True, + }, { + 'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents', + 'only_matching': True, + }] + + def _real_extract(self, url): + course_id = self._match_id(url) + + # TODO: PSM cookie + + course = self._download_course(course_id, url, course_id) + + title = course['title'] + course_name = course['name'] + course_data = course['modules'] + description = course.get('description') or course.get('shortDescription') + + entries = [] + for num, module in enumerate(course_data, 1): + author = module.get('author') + module_name = module.get('name') + if not author or not module_name: + continue + for clip in module.get('clips', []): + clip_index = int_or_none(clip.get('index')) + if clip_index is None: + continue + clip_url = update_url_query( + '%s/player' % self._API_BASE, query={ + 'mode': 'live', + 'course': course_name, + 'author': author, + 'name': module_name, + 'clip': clip_index, + }) + entries.append({ + '_type': 'url_transparent', + 'url': clip_url, + 'ie_key': PluralsightIE.ie_key(), + 'chapter': module.get('title'), + 'chapter_number': num, + 'chapter_id': module.get('moduleRef'), + }) + + return self.playlist_result(entries, course_id, title, description) diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py new file mode 100644 index 0000000..5898d92 --- /dev/null +++ b/yt_dlp/extractor/plutotv.py @@ -0,0 +1,195 @@ +import re +import uuid + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + try_get, + url_or_none, +) + + +class PlutoTVIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'''(?x) + https?://(?:www\.)?pluto\.tv(?:/[^/]+)?/on-demand + /(?P<video_type>movies|series) + /(?P<series_or_movie_slug>[^/]+) + (?: + (?:/seasons?/(?P<season_no>\d+))? + (?:/episode/(?P<episode_slug>[^/]+))? + )? + /?(?:$|[#?])''' + + _INFO_URL = 'https://service-vod.clusters.pluto.tv/v3/vod/slugs/' + _INFO_QUERY_PARAMS = { + 'appName': 'web', + 'appVersion': 'na', + 'clientID': compat_str(uuid.uuid1()), + 'clientModelNumber': 'na', + 'serverSideAds': 'false', + 'deviceMake': 'unknown', + 'deviceModel': 'web', + 'deviceType': 'web', + 'deviceVersion': 'unknown', + 'sid': compat_str(uuid.uuid1()), + } + _TESTS = [ + { + 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/2/episode/its-in-the-cards-2009-2-3', + 'md5': 'ebcdd8ed89aaace9df37924f722fd9bd', + 'info_dict': { + 'id': '5de6c598e9379ae4912df0a8', + 'ext': 'mp4', + 'title': 'It\'s In The Cards', + 'episode': 'It\'s In The Cards', + 'description': 'The teams face off against each other in a 3-on-2 soccer showdown. Strategy comes into play, though, as each team gets to select their opposing teams’ two defenders.', + 'series': 'I Love Money', + 'season_number': 2, + 'episode_number': 3, + 'duration': 3600, + } + }, { + 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/1/', + 'playlist_count': 11, + 'info_dict': { + 'id': '5de6c582e9379ae4912dedbd', + 'title': 'I Love Money - Season 1', + } + }, { + 'url': 'https://pluto.tv/on-demand/series/i-love-money/', + 'playlist_count': 26, + 'info_dict': { + 'id': '5de6c582e9379ae4912dedbd', + 'title': 'I Love Money', + } + }, { + 'url': 'https://pluto.tv/on-demand/movies/arrival-2015-1-1', + 'md5': '3cead001d317a018bf856a896dee1762', + 'info_dict': { + 'id': '5e83ac701fa6a9001bb9df24', + 'ext': 'mp4', + 'title': 'Arrival', + 'description': 'When mysterious spacecraft touch down across the globe, an elite team - led by expert translator Louise Banks (Academy Award® nominee Amy Adams) – races against time to decipher their intent.', + 'duration': 9000, + } + }, { + 'url': 'https://pluto.tv/en/on-demand/series/manhunters-fugitive-task-force/seasons/1/episode/third-times-the-charm-1-1', + 'only_matching': True, + }, { + 'url': 'https://pluto.tv/it/on-demand/series/csi-vegas/episode/legacy-2021-1-1', + 'only_matching': True, + }, + { + 'url': 'https://pluto.tv/en/on-demand/movies/attack-of-the-killer-tomatoes-1977-1-1-ptv1', + 'md5': '7db56369c0da626a32d505ec6eb3f89f', + 'info_dict': { + 'id': '5b190c7bb0875c36c90c29c4', + 'ext': 'mp4', + 'title': 'Attack of the Killer Tomatoes', + 'description': 'A group of scientists band together to save the world from mutated tomatoes that KILL! (1978)', + 'duration': 5700, + } + } + ] + + def _to_ad_free_formats(self, video_id, formats, subtitles): + ad_free_formats, ad_free_subtitles, m3u8_urls = [], {}, set() + for fmt in formats: + res = self._download_webpage( + fmt.get('url'), video_id, note='Downloading m3u8 playlist', + fatal=False) + if not res: + continue + first_segment_url = re.search( + r'^(https?://.*/)0\-(end|[0-9]+)/[^/]+\.ts$', res, + re.MULTILINE) + if first_segment_url: + m3u8_urls.add( + compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8')) + continue + first_segment_url = re.search( + r'^(https?://.*/).+\-0+[0-1]0\.ts$', res, + re.MULTILINE) + if first_segment_url: + m3u8_urls.add( + compat_urlparse.urljoin(first_segment_url.group(1), 'master.m3u8')) + continue + + for m3u8_url in m3u8_urls: + fmts, subs = self._extract_m3u8_formats_and_subtitles( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + ad_free_formats.extend(fmts) + ad_free_subtitles = self._merge_subtitles(ad_free_subtitles, subs) + if ad_free_formats: + formats, subtitles = ad_free_formats, ad_free_subtitles + else: + self.report_warning('Unable to find ad-free formats') + return formats, subtitles + + def _get_video_info(self, video_json, slug, series_name=None): + video_id = video_json.get('_id', slug) + formats, subtitles = [], {} + for video_url in try_get(video_json, lambda x: x['stitched']['urls'], list) or []: + if video_url.get('type') != 'hls': + continue + url = url_or_none(video_url.get('url')) + + fmts, subs = self._extract_m3u8_formats_and_subtitles( + url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + + formats, subtitles = self._to_ad_free_formats(video_id, formats, subtitles) + + info = { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': video_json.get('name'), + 'description': video_json.get('description'), + 'duration': float_or_none(video_json.get('duration'), scale=1000), + } + if series_name: + info.update({ + 'series': series_name, + 'episode': video_json.get('name'), + 'season_number': int_or_none(video_json.get('season')), + 'episode_number': int_or_none(video_json.get('number')), + }) + return info + + def _real_extract(self, url): + mobj = self._match_valid_url(url).groupdict() + info_slug = mobj['series_or_movie_slug'] + video_json = self._download_json(self._INFO_URL + info_slug, info_slug, query=self._INFO_QUERY_PARAMS) + + if mobj['video_type'] == 'series': + series_name = video_json.get('name', info_slug) + season_number, episode_slug = mobj.get('season_number'), mobj.get('episode_slug') + + videos = [] + for season in video_json['seasons']: + if season_number is not None and season_number != int_or_none(season.get('number')): + continue + for episode in season['episodes']: + if episode_slug is not None and episode_slug != episode.get('slug'): + continue + videos.append(self._get_video_info(episode, episode_slug, series_name)) + if not videos: + raise ExtractorError('Failed to find any videos to extract') + if episode_slug is not None and len(videos) == 1: + return videos[0] + playlist_title = series_name + if season_number is not None: + playlist_title += ' - Season %d' % season_number + return self.playlist_result(videos, + playlist_id=video_json.get('_id', info_slug), + playlist_title=playlist_title) + return self._get_video_info(video_json, info_slug) diff --git a/yt_dlp/extractor/podbayfm.py b/yt_dlp/extractor/podbayfm.py new file mode 100644 index 0000000..2a26fd2 --- /dev/null +++ b/yt_dlp/extractor/podbayfm.py @@ -0,0 +1,75 @@ +from .common import InfoExtractor +from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call + + +def result_from_props(props, episode_id=None): + return { + 'id': props.get('podcast_id') or episode_id, + 'title': props.get('title'), + 'url': props['mediaURL'], + 'ext': 'mp3', + 'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']), + 'timestamp': props.get('timestamp'), + 'duration': int_or_none(props.get('duration')), + } + + +class PodbayFMIE(InfoExtractor): + _VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$' + _TESTS = [{ + 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400', + 'md5': '98b41285dcf7989d105a4ed0404054cf', + 'info_dict': { + 'id': '1647338400', + 'title': 'Part One: Kissinger', + 'ext': 'mp3', + 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1647338400, + 'duration': 5001, + 'upload_date': '20220315', + }, + }] + + def _real_extract(self, url): + episode_id = self._match_id(url) + webpage = self._download_webpage(url, episode_id) + data = self._search_nextjs_data(webpage, episode_id) + return result_from_props(data['props']['pageProps']['episode'], episode_id) + + +class PodbayFMChannelIE(InfoExtractor): + _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$' + _TESTS = [{ + 'url': 'https://podbay.fm/p/behind-the-bastards', + 'info_dict': { + 'id': 'behind-the-bastards', + 'title': 'Behind the Bastards', + }, + }] + _PAGE_SIZE = 10 + + def _fetch_page(self, channel_id, pagenum): + return self._download_json( + f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}', + channel_id)['podcast'] + + @staticmethod + def _results_from_page(channel_id, page): + return [{ + **result_from_props(e), + 'extractor': PodbayFMIE.IE_NAME, + 'extractor_key': PodbayFMIE.ie_key(), + # somehow they use timestamps as the episode identifier + 'webpage_url': f'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}', + } for e in page['episodes']] + + def _real_extract(self, url): + channel_id = self._match_id(url) + + first_page = self._fetch_page(channel_id, 0) + entries = OnDemandPagedList( + lambda pagenum: self._results_from_page( + channel_id, self._fetch_page(channel_id, pagenum) if pagenum else first_page), + self._PAGE_SIZE) + + return self.playlist_result(entries, channel_id, first_page.get('title')) diff --git a/yt_dlp/extractor/podchaser.py b/yt_dlp/extractor/podchaser.py new file mode 100644 index 0000000..fc2d407 --- /dev/null +++ b/yt_dlp/extractor/podchaser.py @@ -0,0 +1,97 @@ +import functools +import json + +from .common import InfoExtractor +from ..utils import ( + OnDemandPagedList, + float_or_none, + str_or_none, + str_to_int, + traverse_obj, + unified_timestamp, +) + + +class PodchaserIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?podchaser\.com/podcasts/[\w-]+-(?P<podcast_id>\d+)(?:/episodes/[\w-]+-(?P<id>\d+))?' + _PAGE_SIZE = 100 + _TESTS = [{ + 'url': 'https://www.podchaser.com/podcasts/cum-town-36924/episodes/ep-285-freeze-me-off-104365585', + 'info_dict': { + 'id': '104365585', + 'title': 'Ep. 285 – freeze me off', + 'description': 'cam ahn', + 'thumbnail': r're:^https?://.*\.jpg$', + 'ext': 'mp3', + 'categories': ['Comedy'], + 'tags': ['comedy', 'dark humor'], + 'series': 'Cum Town', + 'duration': 3708, + 'timestamp': 1636531259, + 'upload_date': '20211110', + 'average_rating': 4.0 + } + }, { + 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853', + 'info_dict': { + 'id': '28853', + 'title': 'The Bone Zone', + 'description': 'Podcast by The Bone Zone', + }, + 'playlist_count': 275 + }, { + 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes', + 'info_dict': { + 'id': '699349', + 'title': 'Sean Carroll\'s Mindscape: Science, Society, Philosophy, Culture, Arts, and Ideas', + 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1' + }, + 'playlist_mincount': 225 + }] + + @staticmethod + def _parse_episode(episode, podcast): + return { + 'id': str(episode.get('id')), + 'title': episode.get('title'), + 'description': episode.get('description'), + 'url': episode.get('audio_url'), + 'thumbnail': episode.get('image_url'), + 'duration': str_to_int(episode.get('length')), + 'timestamp': unified_timestamp(episode.get('air_date')), + 'average_rating': float_or_none(episode.get('rating')), + 'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))), + 'tags': traverse_obj(podcast, ('tags', ..., 'text')), + 'series': podcast.get('title'), + } + + def _call_api(self, path, *args, **kwargs): + return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs) + + def _fetch_page(self, podcast_id, podcast, page): + json_response = self._call_api( + 'list/episode', podcast_id, + headers={'Content-Type': 'application/json;charset=utf-8'}, + data=json.dumps({ + 'start': page * self._PAGE_SIZE, + 'count': self._PAGE_SIZE, + 'sort_order': 'SORT_ORDER_RECENT', + 'filters': { + 'podcast_id': podcast_id + }, + 'options': {} + }).encode()) + + for episode in json_response['entities']: + yield self._parse_episode(episode, podcast) + + def _real_extract(self, url): + podcast_id, episode_id = self._match_valid_url(url).group('podcast_id', 'id') + podcast = self._call_api(f'podcasts/{podcast_id}', episode_id or podcast_id) + if not episode_id: + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE), + str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description')) + + episode = self._call_api(f'episodes/{episode_id}', episode_id) + return self._parse_episode(episode, podcast) diff --git a/yt_dlp/extractor/podomatic.py b/yt_dlp/extractor/podomatic.py new file mode 100644 index 0000000..37b6869 --- /dev/null +++ b/yt_dlp/extractor/podomatic.py @@ -0,0 +1,74 @@ +import json + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PodomaticIE(InfoExtractor): + _WORKING = False + IE_NAME = 'podomatic' + _VALID_URL = r'''(?x) + (?P<proto>https?):// + (?: + (?P<channel>[^.]+)\.podomatic\.com/entry| + (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes + )/ + (?P<id>[^/?#&]+) + ''' + + _TESTS = [{ + 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', + 'md5': '84bb855fcf3429e6bf72460e1eed782d', + 'info_dict': { + 'id': '2009-01-02T16_03_35-08_00', + 'ext': 'mp3', + 'uploader': 'Science Teaching Tips', + 'uploader_id': 'scienceteachingtips', + 'title': '64. When the Moon Hits Your Eye', + 'duration': 446, + } + }, { + 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', + 'md5': 'd2cf443931b6148e27638650e2638297', + 'info_dict': { + 'id': '2013-11-15T16_31_21-08_00', + 'ext': 'mp3', + 'uploader': 'Ostbahnhof / Techno Mix', + 'uploader_id': 'ostbahnhof', + 'title': 'Einunddreizig', + 'duration': 3799, + } + }, { + 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + channel = mobj.group('channel') or mobj.group('channel_2') + + json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' + + '?permalink=true&rtmp=0') % + (mobj.group('proto'), channel, video_id)) + data_json = self._download_webpage( + json_url, video_id, 'Downloading video info') + data = json.loads(data_json) + + video_url = data['downloadLink'] + if not video_url: + video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation']) + uploader = data['podcast'] + title = data['title'] + thumbnail = data['imageLocation'] + duration = int_or_none(data.get('length'), 1000) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'uploader': uploader, + 'uploader_id': channel, + 'thumbnail': thumbnail, + 'duration': duration, + } diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py new file mode 100644 index 0000000..0911893 --- /dev/null +++ b/yt_dlp/extractor/pokemon.py @@ -0,0 +1,136 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + extract_attributes, + int_or_none, + js_to_json, + merge_dicts, +) + + +class PokemonIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/', + 'md5': '2fe8eaec69768b25ef898cda9c43062e', + 'info_dict': { + 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4', + 'ext': 'mp4', + 'title': 'The Ol’ Raise and Switch!', + 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', + }, + 'add_id': ['LimelightMedia'], + }, { + # no data-video-title + 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008', + 'info_dict': { + 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1', + 'ext': 'mp4', + 'title': "Pokémon : L'ascension de Darkrai", + 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5', + }, + 'add_id': ['LimelightMedia'], + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', + 'only_matching': True, + }, { + 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/', + 'only_matching': True, + }, { + 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, video_id or display_id) + video_data = extract_attributes(self._search_regex( + r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), + webpage, 'video data element')) + video_id = video_data['data-video-id'] + title = video_data.get('data-video-title') or self._html_search_meta( + 'pkm-title', webpage, ' title', default=None) or self._search_regex( + r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title') + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:%s' % video_id, + 'title': title, + 'description': video_data.get('data-video-summary'), + 'thumbnail': video_data.get('data-video-poster'), + 'series': 'Pokémon', + 'season_number': int_or_none(video_data.get('data-video-season')), + 'episode': title, + 'episode_number': int_or_none(video_data.get('data-video-episode')), + 'ie_key': 'LimelightMedia', + } + + +class PokemonWatchIE(InfoExtractor): + _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})' + _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}' + _TESTS = [{ + 'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667', + 'md5': '62833938a31e61ab49ada92f524c42ff', + 'info_dict': { + 'id': '8309a40969894a8e8d5bc1311e9c5667', + 'ext': 'mp4', + 'title': 'Lillier and the Staff!', + 'description': 'md5:338841b8c21b283d24bdc9b568849f04', + } + }, { + 'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2', + 'only_matching': True + }, { + 'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07', + 'only_matching': True + }] + + def _extract_media(self, channel_array, video_id): + for channel in channel_array: + for media in channel.get('media'): + if media.get('id') == video_id: + return media + return None + + def _real_extract(self, url): + video_id = self._match_id(url) + + info = { + '_type': 'url', + 'id': video_id, + 'url': 'limelight:media:%s' % video_id, + 'ie_key': 'LimelightMedia', + } + + # API call can be avoided entirely if we are listing formats + if self.get_param('listformats', False): + return info + + webpage = self._download_webpage(url, video_id) + build_vars = self._parse_json(self._search_regex( + r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'), + video_id, transform_source=js_to_json) + region = build_vars.get('region') + channel_array = self._download_json(self._API_URL.format(region), video_id) + video_data = self._extract_media(channel_array, video_id) + + if video_data is None: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) + + info['_type'] = 'url_transparent' + images = video_data.get('images') + + return merge_dicts(info, { + 'title': video_data.get('title'), + 'description': video_data.get('description'), + 'thumbnail': images.get('medium') or images.get('small'), + 'series': 'Pokémon', + 'season_number': int_or_none(video_data.get('season')), + 'episode': video_data.get('title'), + 'episode_number': int_or_none(video_data.get('episode')), + }) diff --git a/yt_dlp/extractor/pokergo.py b/yt_dlp/extractor/pokergo.py new file mode 100644 index 0000000..5c7baad --- /dev/null +++ b/yt_dlp/extractor/pokergo.py @@ -0,0 +1,106 @@ +import base64 + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + try_get, +) + + +class PokerGoBaseIE(InfoExtractor): + _NETRC_MACHINE = 'pokergo' + _AUTH_TOKEN = None + _PROPERTY_ID = '1dfb3940-7d53-4980-b0b0-f28b369a000d' + + def _perform_login(self, username, password): + if self._AUTH_TOKEN: + return + self.report_login() + PokerGoBaseIE._AUTH_TOKEN = self._download_json( + f'https://subscription.pokergo.com/properties/{self._PROPERTY_ID}/sign-in', None, + headers={'authorization': f'Basic {base64.b64encode(f"{username}:{password}".encode()).decode()}'}, + data=b'')['meta']['token'] + if not self._AUTH_TOKEN: + raise ExtractorError('Unable to get Auth Token.', expected=True) + + def _real_initialize(self): + if not self._AUTH_TOKEN: + self.raise_login_required(method='password') + + +class PokerGoIE(PokerGoBaseIE): + _VALID_URL = r'https?://(?:www\.)?pokergo\.com/videos/(?P<id>[^&$#/?]+)' + + _TESTS = [{ + 'url': 'https://www.pokergo.com/videos/2a70ec4e-4a80-414b-97ec-725d9b72a7dc', + 'info_dict': { + 'id': 'aVLOxDzY', + 'ext': 'mp4', + 'title': 'Poker After Dark | Season 12 (2020) | Cry Me a River | Episode 2', + 'description': 'md5:c7a8c29556cbfb6eb3c0d5d622251b71', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/aVLOxDzY/poster.jpg?width=720', + 'timestamp': 1608085715, + 'duration': 2700.12, + 'season_number': 12, + 'episode_number': 2, + 'series': 'poker after dark', + 'upload_date': '20201216', + 'season': 'Season 12', + 'episode': 'Episode 2', + 'display_id': '2a70ec4e-4a80-414b-97ec-725d9b72a7dc', + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + data_json = self._download_json(f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/videos/{id}', id, + headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] + v_id = data_json['source'] + + thumbnails = [{ + 'url': image['url'], + 'id': image.get('label'), + 'width': image.get('width'), + 'height': image.get('height') + } for image in data_json.get('images') or [] if image.get('url')] + series_json = next(dct for dct in data_json.get('show_tags') or [] if dct.get('video_id') == id) or {} + + return { + '_type': 'url_transparent', + 'display_id': id, + 'title': data_json.get('title'), + 'description': data_json.get('description'), + 'duration': data_json.get('duration'), + 'thumbnails': thumbnails, + 'season_number': series_json.get('season'), + 'episode_number': series_json.get('episode_number'), + 'series': try_get(series_json, lambda x: x['tag']['name']), + 'url': f'https://cdn.jwplayer.com/v2/media/{v_id}' + } + + +class PokerGoCollectionIE(PokerGoBaseIE): + _VALID_URL = r'https?://(?:www\.)?pokergo\.com/collections/(?P<id>[^&$#/?]+)' + + _TESTS = [{ + 'url': 'https://www.pokergo.com/collections/19ffe481-5dae-481a-8869-75cc0e3c4700', + 'playlist_mincount': 13, + 'info_dict': { + 'id': '19ffe481-5dae-481a-8869-75cc0e3c4700', + }, + }] + + def _entries(self, id): + data_json = self._download_json(f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/collections/{id}?include=entities', + id, headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] + for video in data_json.get('collection_video') or []: + video_id = video.get('id') + if video_id: + yield self.url_result( + f'https://www.pokergo.com/videos/{video_id}', + ie=PokerGoIE.ie_key(), video_id=video_id) + + def _real_extract(self, url): + id = self._match_id(url) + return self.playlist_result(self._entries(id), playlist_id=id) diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py new file mode 100644 index 0000000..1524a1f --- /dev/null +++ b/yt_dlp/extractor/polsatgo.py @@ -0,0 +1,86 @@ +from uuid import uuid4 +import json + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + url_or_none, + ExtractorError, +) + + +class PolsatGoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polsat(?:box)?go\.pl/.+/(?P<id>[0-9a-fA-F]+)(?:[/#?]|$)' + _TESTS = [{ + 'url': 'https://polsatgo.pl/wideo/seriale/swiat-wedlug-kiepskich/5024045/sezon-1/5028300/swiat-wedlug-kiepskich-odcinek-88/4121', + 'info_dict': { + 'id': '4121', + 'ext': 'mp4', + 'title': 'Świat według Kiepskich - Odcinek 88', + 'age_limit': 12, + }, + }] + + def _extract_formats(self, sources, video_id): + for source in sources or []: + if not source.get('id'): + continue + url = url_or_none(self._call_api( + 'drm', video_id, 'getPseudoLicense', + {'mediaId': video_id, 'sourceId': source['id']}).get('url')) + if not url: + continue + yield { + 'url': url, + 'height': int_or_none(try_get(source, lambda x: x['quality'][:-1])) + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('navigation', video_id, 'prePlayData', {'mediaId': video_id})['mediaItem'] + + formats = list(self._extract_formats( + try_get(media, lambda x: x['playback']['mediaSources']), video_id)) + + return { + 'id': video_id, + 'title': media['displayInfo']['title'], + 'formats': formats, + 'age_limit': int_or_none(media['displayInfo']['ageGroup']) + } + + def _call_api(self, endpoint, media_id, method, params): + rand_uuid = str(uuid4()) + res = self._download_json( + f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id, + note=f'Downloading {method} JSON metadata', + data=json.dumps({ + 'method': method, + 'id': '2137', + 'jsonrpc': '2.0', + 'params': { + **params, + 'userAgentData': { + 'deviceType': 'mobile', + 'application': 'native', + 'os': 'android', + 'build': 10003, + 'widevine': False, + 'portal': 'pg', + 'player': 'cpplayer', + }, + 'deviceId': { + 'type': 'other', + 'value': rand_uuid, + }, + 'clientId': rand_uuid, + 'cpid': 1, + }, + }).encode('utf-8'), + headers={'Content-type': 'application/json'}) + if not res.get('result'): + if res['error']['code'] == 13404: + raise ExtractorError('This video is either unavailable in your region or is DRM protected', expected=True) + raise ExtractorError(f'Solorz said: {res["error"]["message"]} - {res["error"]["data"]["userMessage"]}') + return res['result'] diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py new file mode 100644 index 0000000..e0b22ff --- /dev/null +++ b/yt_dlp/extractor/polskieradio.py @@ -0,0 +1,610 @@ +import itertools +import json +import math +import re +import urllib.parse + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + InAdvancePagedList, + determine_ext, + extract_attributes, + int_or_none, + js_to_json, + parse_iso8601, + strip_or_none, + traverse_obj, + unescapeHTML, + unified_timestamp, + url_or_none, + urljoin, +) + + +class PolskieRadioBaseExtractor(InfoExtractor): + def _extract_webpage_player_entries(self, webpage, playlist_id, base_data): + media_urls = set() + + for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', webpage): + media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) + if not media.get('file') or not media.get('desc'): + continue + media_url = self._proto_relative_url(media['file']) + if media_url in media_urls: + continue + media_urls.add(media_url) + entry = base_data.copy() + entry.update({ + 'id': compat_str(media['id']), + 'url': media_url, + 'duration': int_or_none(media.get('length')), + 'vcodec': 'none' if media.get('provider') == 'audio' else None, + }) + entry_title = urllib.parse.unquote(media['desc']) + if entry_title: + entry['title'] = entry_title + yield entry + + +class PolskieRadioLegacyIE(PolskieRadioBaseExtractor): + # legacy sites + IE_NAME = 'polskieradio:legacy' + _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/[Aa]rtykul/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.polskieradio.pl/8/2382/Artykul/2534482,Zagarysci-Poezja-jak-spoiwo', + 'info_dict': { + 'id': '2534482', + 'title': 'Żagaryści. Poezja jak spoiwo', + 'description': 'md5:f18d95d5dcba747a09b635e21a4c0695', + }, + 'playlist': [{ + 'md5': 'd07559829f61d5a93a75755987ded760', + 'info_dict': { + 'id': '2516679', + 'ext': 'mp3', + 'title': 'md5:c6e1234e0b747ad883cb91b7ad06b98c', + 'timestamp': 1592654400, + 'upload_date': '20200620', + 'duration': 1430, + 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' + }, + }], + }, { + # PR4 audition - other frontend + 'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301', + 'info_dict': { + 'id': '2610977', + 'ext': 'mp3', + 'title': 'Pogłos 29 października godz. 23:01', + }, + }, { + 'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage, urlh = self._download_webpage_handle(url, playlist_id) + if PolskieRadioIE.suitable(urlh.url): + return self.url_result(urlh.url, PolskieRadioIE, playlist_id) + + content = self._search_regex( + r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>', + webpage, 'content', default=None) + + timestamp = unified_timestamp(self._html_search_regex( + r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>', + webpage, 'timestamp', default=None)) + + thumbnail_url = self._og_search_thumbnail(webpage, default=None) + + title = self._og_search_title(webpage).strip() + + description = strip_or_none(self._og_search_description(webpage, default=None)) + description = description.replace('\xa0', ' ') if description is not None else None + + if not content: + return { + 'id': playlist_id, + 'url': self._proto_relative_url( + self._search_regex( + r"source:\s*'(//static\.prsa\.pl/[^']+)'", + webpage, 'audition record url')), + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'thumbnail': thumbnail_url, + } + + entries = self._extract_webpage_player_entries(content, playlist_id, { + 'title': title, + 'timestamp': timestamp, + 'thumbnail': thumbnail_url, + }) + + return self.playlist_result(entries, playlist_id, title, description) + + +class PolskieRadioIE(PolskieRadioBaseExtractor): + # new next.js sites + _VALID_URL = r'https?://(?:[^/]+\.)?(?:polskieradio(?:24)?|radiokierowcow)\.pl/artykul/(?P<id>\d+)' + _TESTS = [{ + # articleData, attachments + 'url': 'https://jedynka.polskieradio.pl/artykul/1587943', + 'info_dict': { + 'id': '1587943', + 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', + 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', + }, + 'playlist': [{ + 'md5': '2984ee6ce9046d91fc233bc1a864a09a', + 'info_dict': { + 'id': '7a85d429-5356-4def-a347-925e4ae7406b', + 'ext': 'mp3', + 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', + }, + }], + }, { + # post, legacy html players + 'url': 'https://trojka.polskieradio.pl/artykul/2589163,Czy-wciaz-otrzymujemy-zdjecia-z-sond-Voyager', + 'info_dict': { + 'id': '2589163', + 'title': 'Czy wciąż otrzymujemy zdjęcia z sond Voyager?', + 'description': 'md5:cf1a7f348d63a2db9c0d7a63d1669473', + }, + 'playlist': [{ + 'info_dict': { + 'id': '2577880', + 'ext': 'mp3', + 'title': 'md5:a57d10a0c02abd34dd675cb33707ad5a', + 'duration': 321, + }, + }], + }, { + # data, legacy + 'url': 'https://radiokierowcow.pl/artykul/2694529', + 'info_dict': { + 'id': '2694529', + 'title': 'Zielona fala reliktem przeszłości?', + 'description': 'md5:f20a9a7ed9cb58916c54add94eae3bc0', + }, + 'playlist_count': 3, + }, { + 'url': 'https://trojka.polskieradio.pl/artykul/1632955', + 'only_matching': True, + }, { + # with mp4 video + 'url': 'https://trojka.polskieradio.pl/artykul/1634903', + 'only_matching': True, + }, { + 'url': 'https://jedynka.polskieradio.pl/artykul/3042436,Polityka-wschodnia-ojca-i-syna-Wladyslawa-Lokietka-i-Kazimierza-Wielkiego', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + article_data = traverse_obj( + self._search_nextjs_data(webpage, playlist_id), ( + 'props', 'pageProps', (('data', 'articleData'), 'post', 'data')), get_all=False) + + title = strip_or_none(article_data['title']) + + description = strip_or_none(article_data.get('lead')) + + entries = [{ + 'url': entry['file'], + 'ext': determine_ext(entry.get('fileName')), + 'id': self._search_regex( + r'([a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12})', entry['file'], 'entry id'), + 'title': strip_or_none(entry.get('description')) or title, + } for entry in article_data.get('attachments') or () if entry.get('fileType') in ('Audio', )] + + if not entries: + # some legacy articles have no json attachments, but players in body + entries = self._extract_webpage_player_entries(article_data['content'], playlist_id, { + 'title': title, + }) + + return self.playlist_result(entries, playlist_id, title, description) + + +class PolskieRadioAuditionIE(InfoExtractor): + # new next.js sites + IE_NAME = 'polskieradio:audition' + _VALID_URL = r'https?://(?:[^/]+\.)?polskieradio\.pl/audycj[ae]/(?P<id>\d+)' + _TESTS = [{ + # articles, PR1 + 'url': 'https://jedynka.polskieradio.pl/audycje/5102', + 'info_dict': { + 'id': '5102', + 'title': 'Historia żywa', + 'thumbnail': r're:https://static\.prsa\.pl/images/.+', + }, + 'playlist_mincount': 38, + }, { + # episodes, PR1 + 'url': 'https://jedynka.polskieradio.pl/audycje/5769', + 'info_dict': { + 'id': '5769', + 'title': 'AgroFakty', + 'thumbnail': r're:https://static\.prsa\.pl/images/.+', + }, + 'playlist_mincount': 269, + }, { + # both episodes and articles, PR3 + 'url': 'https://trojka.polskieradio.pl/audycja/8906', + 'info_dict': { + 'id': '8906', + 'title': 'Trójka budzi', + 'thumbnail': r're:https://static\.prsa\.pl/images/.+', + }, + 'playlist_mincount': 722, + }, { + # some articles were "promoted to main page" and thus link to old frontend + 'url': 'https://trojka.polskieradio.pl/audycja/305', + 'info_dict': { + 'id': '305', + 'title': 'Co w mowie piszczy?', + 'thumbnail': r're:https://static\.prsa\.pl/images/.+', + }, + 'playlist_count': 1523, + }] + + def _call_lp3(self, path, query, video_id, note): + return self._download_json( + f'https://lp3test.polskieradio.pl/{path}', video_id, note, + query=query, headers={'x-api-key': '9bf6c5a2-a7d0-4980-9ed7-a3f7291f2a81'}) + + def _entries(self, playlist_id, has_episodes, has_articles): + for i in itertools.count(0) if has_episodes else []: + page = self._call_lp3( + 'AudioArticle/GetListByCategoryId', { + 'categoryId': playlist_id, + 'PageSize': 10, + 'skip': i, + 'format': 400, + }, playlist_id, f'Downloading episode list page {i + 1}') + if not traverse_obj(page, 'data'): + break + for episode in page['data']: + yield { + 'id': str(episode['id']), + 'url': episode['file'], + 'title': episode.get('title'), + 'duration': int_or_none(episode.get('duration')), + 'timestamp': parse_iso8601(episode.get('datePublic')), + } + + for i in itertools.count(0) if has_articles else []: + page = self._call_lp3( + 'Article/GetListByCategoryId', { + 'categoryId': playlist_id, + 'PageSize': 9, + 'skip': i, + 'format': 400, + }, playlist_id, f'Downloading article list page {i + 1}') + if not traverse_obj(page, 'data'): + break + for article in page['data']: + yield { + '_type': 'url_transparent', + 'id': str(article['id']), + 'url': article['url'], + 'title': article.get('shortTitle'), + 'description': traverse_obj(article, ('description', 'lead')), + 'timestamp': parse_iso8601(article.get('datePublic')), + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + page_props = traverse_obj( + self._search_nextjs_data(self._download_webpage(url, playlist_id), playlist_id), + ('props', 'pageProps', ('data', None)), get_all=False) + + has_episodes = bool(traverse_obj(page_props, 'episodes', 'audios')) + has_articles = bool(traverse_obj(page_props, 'articles')) + + return self.playlist_result( + self._entries(playlist_id, has_episodes, has_articles), playlist_id, + title=traverse_obj(page_props, ('details', 'name')), + description=traverse_obj(page_props, ('details', 'description', 'lead')), + thumbnail=traverse_obj(page_props, ('details', 'photo'))) + + +class PolskieRadioCategoryIE(InfoExtractor): + # legacy sites + IE_NAME = 'polskieradio:category' + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/(?:\d+(?:,[^/]+)?/|[^/]+/Tag)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', + 'info_dict': { + 'id': '4143', + 'title': 'Kierunek Kraków', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', + 'info_dict': { + 'id': '214', + 'title': 'Muzyka', + }, + 'playlist_mincount': 61 + }, { + # billennium tabs + 'url': 'https://www.polskieradio.pl/8/2385', + 'info_dict': { + 'id': '2385', + 'title': 'Droga przez mąkę', + }, + 'playlist_mincount': 111, + }, { + 'url': 'https://www.polskieradio.pl/10/4930', + 'info_dict': { + 'id': '4930', + 'title': 'Teraz K-pop!', + }, + 'playlist_mincount': 392, + }, { + # post back pages, audio content directly without articles + 'url': 'https://www.polskieradio.pl/8,dwojka/7376,nowa-mowa', + 'info_dict': { + 'id': '7376', + 'title': 'Nowa mowa', + }, + 'playlist_mincount': 244, + }, { + 'url': 'https://www.polskieradio.pl/Krzysztof-Dziuba/Tag175458', + 'info_dict': { + 'id': '175458', + 'title': 'Krzysztof Dziuba', + }, + 'playlist_mincount': 420, + }, { + 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if PolskieRadioLegacyIE.suitable(url) else super().suitable(url) + + def _entries(self, url, page, category_id): + content = page + is_billennium_tabs = 'onclick="TB_LoadTab(' in page + is_post_back = 'onclick="__doPostBack(' in page + pagination = page if is_billennium_tabs else None + for page_num in itertools.count(2): + for a_entry, entry_id in re.findall( + r'(?s)<article[^>]+>.*?(<a[^>]+href=["\'](?:(?:https?)?://[^/]+)?/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>', + content): + entry = extract_attributes(a_entry) + if entry.get('href'): + yield self.url_result( + urljoin(url, entry['href']), PolskieRadioLegacyIE, entry_id, entry.get('title')) + for a_entry in re.findall(r'<span data-media=({[^ ]+})', content): + yield traverse_obj(self._parse_json(a_entry, category_id), { + 'url': 'file', + 'id': 'uid', + 'duration': 'length', + 'title': ('title', {urllib.parse.unquote}), + 'description': ('desc', {urllib.parse.unquote}), + }) + if is_billennium_tabs: + params = self._search_json( + r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+onclick=["\']TB_LoadTab\(', + pagination, 'next page params', category_id, default=None, close_objects=1, + contains_pattern='.+', transform_source=lambda x: '[%s' % js_to_json(unescapeHTML(x))) + if not params: + break + tab_content = self._download_json( + 'https://www.polskieradio.pl/CMS/TemplateBoxesManagement/TemplateBoxTabContent.aspx/GetTabContent', + category_id, f'Downloading page {page_num}', headers={'content-type': 'application/json'}, + data=json.dumps(dict(zip(( + 'boxInstanceId', 'tabId', 'categoryType', 'sectionId', 'categoryId', 'pagerMode', + 'subjectIds', 'tagIndexId', 'queryString', 'name', 'openArticlesInParentTemplate', + 'idSectionFromUrl', 'maxDocumentAge', 'showCategoryForArticle', 'pageNumber' + ), params))).encode())['d'] + content, pagination = tab_content['Content'], tab_content.get('PagerContent') + elif is_post_back: + target = self._search_regex( + r'onclick=(?:["\'])__doPostBack\((?P<q1>["\'])(?P<target>[\w$]+)(?P=q1)\s*,\s*(?P<q2>["\'])Next(?P=q2)', + content, 'pagination postback target', group='target', default=None) + if not target: + break + content = self._download_webpage( + url, category_id, f'Downloading page {page_num}', + data=urllib.parse.urlencode({ + **self._hidden_inputs(content), + '__EVENTTARGET': target, + '__EVENTARGUMENT': 'Next', + }).encode()) + else: + next_url = urljoin(url, self._search_regex( + r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', + content, 'next page url', group='url', default=None)) + if not next_url: + break + content = self._download_webpage(next_url, category_id, f'Downloading page {page_num}') + + def _real_extract(self, url): + category_id = self._match_id(url) + webpage, urlh = self._download_webpage_handle(url, category_id) + if PolskieRadioAuditionIE.suitable(urlh.url): + return self.url_result(urlh.url, PolskieRadioAuditionIE, category_id) + title = self._html_search_regex( + r'<title>([^<]+)(?: - [^<]+ - [^<]+| w [Pp]olskie[Rr]adio\.pl\s*)', + webpage, 'title', fatal=False) + return self.playlist_result( + self._entries(url, webpage, category_id), + category_id, title) + + +class PolskieRadioPlayerIE(InfoExtractor): + IE_NAME = 'polskieradio:player' + _VALID_URL = r'https?://player\.polskieradio\.pl/anteny/(?P[^/]+)' + + _BASE_URL = 'https://player.polskieradio.pl' + _PLAYER_URL = 'https://player.polskieradio.pl/main.bundle.js' + _STATIONS_API_URL = 'https://apipr.polskieradio.pl/api/stacje' + + _TESTS = [{ + 'url': 'https://player.polskieradio.pl/anteny/trojka', + 'info_dict': { + 'id': '3', + 'ext': 'm4a', + 'title': 'Trójka', + }, + 'params': { + 'format': 'bestaudio', + 'skip_download': 'endless stream', + }, + }] + + def _get_channel_list(self, channel_url='no_channel'): + player_code = self._download_webpage( + self._PLAYER_URL, channel_url, + note='Downloading js player') + channel_list = js_to_json(self._search_regex( + r';var r="anteny",a=(\[.+?\])},', player_code, 'channel list')) + return self._parse_json(channel_list, channel_url) + + def _real_extract(self, url): + channel_url = self._match_id(url) + channel_list = self._get_channel_list(channel_url) + + channel = next((c for c in channel_list if c.get('url') == channel_url), None) + + if not channel: + raise ExtractorError('Channel not found') + + station_list = self._download_json(self._STATIONS_API_URL, channel_url, + note='Downloading stream url list', + headers={ + 'Accept': 'application/json', + 'Referer': url, + 'Origin': self._BASE_URL, + }) + station = next((s for s in station_list + if s.get('Name') == (channel.get('streamName') or channel.get('name'))), None) + if not station: + raise ExtractorError('Station not found even though we extracted channel') + + formats = [] + for stream_url in station['Streams']: + stream_url = self._proto_relative_url(stream_url) + if stream_url.endswith('/playlist.m3u8'): + formats.extend(self._extract_m3u8_formats(stream_url, channel_url, live=True)) + elif stream_url.endswith('/manifest.f4m'): + formats.extend(self._extract_mpd_formats(stream_url, channel_url)) + elif stream_url.endswith('/Manifest'): + formats.extend(self._extract_ism_formats(stream_url, channel_url)) + else: + formats.append({ + 'url': stream_url, + }) + + return { + 'id': compat_str(channel['id']), + 'formats': formats, + 'title': channel.get('name') or channel.get('streamName'), + 'display_id': channel_url, + 'thumbnail': f'{self._BASE_URL}/images/{channel_url}-color-logo.png', + 'is_live': True, + } + + +class PolskieRadioPodcastBaseExtractor(InfoExtractor): + _API_BASE = 'https://apipodcasts.polskieradio.pl/api' + + def _parse_episode(self, data): + return { + 'id': data['guid'], + 'formats': [{ + 'url': data['url'], + 'filesize': int_or_none(data.get('fileSize')), + }], + 'title': data['title'], + 'description': data.get('description'), + 'duration': int_or_none(data.get('length')), + 'timestamp': parse_iso8601(data.get('publishDate')), + 'thumbnail': url_or_none(data.get('image')), + 'series': data.get('podcastTitle'), + 'episode': data['title'], + } + + +class PolskieRadioPodcastListIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast:list' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/podcast/(?P\d+)' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/podcast/8/', + 'info_dict': { + 'id': '8', + 'title': 'Śniadanie w Trójce', + 'description': 'md5:57abcc27bc4c6a6b25baa3061975b9ef', + 'uploader': 'Beata Michniewicz', + }, + 'playlist_mincount': 714, + }] + _PAGE_SIZE = 10 + + def _call_api(self, podcast_id, page): + return self._download_json( + f'{self._API_BASE}/Podcasts/{podcast_id}/?pageSize={self._PAGE_SIZE}&page={page}', + podcast_id, f'Downloading page {page}') + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._call_api(podcast_id, 1) + + def get_page(page_num): + page_data = self._call_api(podcast_id, page_num + 1) if page_num else data + yield from (self._parse_episode(ep) for ep in page_data['items']) + + return { + '_type': 'playlist', + 'entries': InAdvancePagedList( + get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE), + 'id': str(data['id']), + 'title': data.get('title'), + 'description': data.get('description'), + 'uploader': data.get('announcer'), + } + + +class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/track/(?P[a-f\d]{8}(?:-[a-f\d]{4}){4}[a-f\d]{8})' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/track/6eafe403-cb8f-4756-b896-4455c3713c32', + 'info_dict': { + 'id': '6eafe403-cb8f-4756-b896-4455c3713c32', + 'ext': 'mp3', + 'title': 'Theresa May rezygnuje. Co dalej z brexitem?', + 'description': 'md5:e41c409a29d022b70ef0faa61dbded60', + 'episode': 'Theresa May rezygnuje. Co dalej z brexitem?', + 'duration': 2893, + 'thumbnail': 'https://static.prsa.pl/images/58649376-c8a0-4ba2-a714-78b383285f5f.jpg', + 'series': 'Raport o stanie świata', + }, + }] + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._download_json( + f'{self._API_BASE}/audio', + podcast_id, 'Downloading podcast metadata', + data=json.dumps({ + 'guids': [podcast_id], + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + }) + return self._parse_episode(data[0]) diff --git a/yt_dlp/extractor/popcorntimes.py b/yt_dlp/extractor/popcorntimes.py new file mode 100644 index 0000000..ddc5ec8 --- /dev/null +++ b/yt_dlp/extractor/popcorntimes.py @@ -0,0 +1,91 @@ +from .common import InfoExtractor +from ..compat import compat_b64decode +from ..utils import int_or_none + + +class PopcorntimesIE(InfoExtractor): + _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P[^/]+)/(?P[^/?#&]+)' + _TEST = { + 'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy', + 'md5': '93f210991ad94ba8c3485950a2453257', + 'info_dict': { + 'id': 'A1XCFvz', + 'display_id': 'haensel-und-gretel-opera-fantasy', + 'ext': 'mp4', + 'title': 'Hänsel und Gretel', + 'description': 'md5:1b8146791726342e7b22ce8125cf6945', + 'thumbnail': r're:^https?://.*\.jpg$', + 'creator': 'John Paul', + 'release_date': '19541009', + 'duration': 4260, + 'tbr': 5380, + 'width': 720, + 'height': 540, + }, + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + title = self._search_regex( + r'

([^<]+)', webpage, 'title', + default=None) or self._html_search_meta( + 'ya:ovs:original_name', webpage, 'title', fatal=True) + + loc = self._search_regex( + r'PCTMLOC\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'loc', + group='value') + + loc_b64 = '' + for c in loc: + c_ord = ord(c) + if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'): + upper = ord('Z') if c_ord <= ord('Z') else ord('z') + c_ord += 13 + if upper < c_ord: + c_ord -= 26 + loc_b64 += chr(c_ord) + + video_url = compat_b64decode(loc_b64).decode('utf-8') + + description = self._html_search_regex( + r'(?s)]+class=["\']pt-movie-desc[^>]+>(.+?)', webpage, + 'description', fatal=False) + + thumbnail = self._search_regex( + r']+class=["\']video-preview[^>]+\bsrc=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'thumbnail', default=None, + group='value') or self._og_search_thumbnail(webpage) + + creator = self._html_search_meta( + 'video:director', webpage, 'creator', default=None) + + release_date = self._html_search_meta( + 'video:release_date', webpage, default=None) + if release_date: + release_date = release_date.replace('-', '') + + def int_meta(name): + return int_or_none(self._html_search_meta( + name, webpage, default=None)) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'creator': creator, + 'release_date': release_date, + 'duration': int_meta('video:duration'), + 'tbr': int_meta('ya:ovs:bitrate'), + 'width': int_meta('og:video:width'), + 'height': int_meta('og:video:height'), + 'http_headers': { + 'Referer': url, + }, + } diff --git a/yt_dlp/extractor/popcorntv.py b/yt_dlp/extractor/popcorntv.py new file mode 100644 index 0000000..7798462 --- /dev/null +++ b/yt_dlp/extractor/popcorntv.py @@ -0,0 +1,72 @@ +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + int_or_none, + unified_timestamp, +) + + +class PopcornTVIE(InfoExtractor): + _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P[^/]+)/(?P\d+)' + _TESTS = [{ + 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183', + 'md5': '47d65a48d147caf692ab8562fe630b45', + 'info_dict': { + 'id': '9183', + 'display_id': 'food-wars-battaglie-culinarie-episodio-01', + 'ext': 'mp4', + 'title': 'Food Wars, Battaglie Culinarie | Episodio 01', + 'description': 'md5:b8bea378faae4651d3b34c6e112463d0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1497610857, + 'upload_date': '20170616', + 'duration': 1440, + 'view_count': int, + }, + }, { + 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id, video_id = mobj.group('display_id', 'id') + + webpage = self._download_webpage(url, display_id) + + m3u8_url = extract_attributes( + self._search_regex( + r'(]+itemprop=["\'](?:content|embed)Url[^>]*>)', + webpage, 'content' + ))['href'] + + formats = self._extract_m3u8_formats( + m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + title = self._search_regex( + r']+itemprop=["\']name[^>]*>([^<]+)', webpage, + 'title', default=None) or self._og_search_title(webpage) + + description = self._html_search_regex( + r'(?s)]+itemprop=["\']description[^>]*>(.+?)', + webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) + timestamp = unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp')) + duration = int_or_none(self._html_search_meta( + 'duration', webpage), invscale=60) + view_count = int_or_none(self._html_search_meta( + 'interactionCount', webpage, 'view count')) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + } diff --git a/yt_dlp/extractor/porn91.py b/yt_dlp/extractor/porn91.py new file mode 100644 index 0000000..7d16a16 --- /dev/null +++ b/yt_dlp/extractor/porn91.py @@ -0,0 +1,95 @@ +import urllib.parse +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + parse_duration, + remove_end, + unified_strdate, + ExtractorError, +) + + +class Porn91IE(InfoExtractor): + IE_NAME = '91porn' + _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/view_video.php\?([^#]+&)?viewkey=(?P\w+)' + + _TESTS = [{ + 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134', + 'md5': 'd869db281402e0ef4ddef3c38b866f86', + 'info_dict': { + 'id': '7e42283b4f5ab36da134', + 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', + 'description': 'md5:1ff241f579b07ae936a54e810ad2e891', + 'ext': 'mp4', + 'duration': 431, + 'upload_date': '20150520', + 'comment_count': int, + 'view_count': int, + 'age_limit': 18, + } + }, { + 'url': 'https://91porn.com/view_video.php?viewkey=7ef0cf3d362c699ab91c', + 'md5': 'f8fd50540468a6d795378cd778b40226', + 'info_dict': { + 'id': '7ef0cf3d362c699ab91c', + 'title': '真实空乘,冲上云霄第二部', + 'description': 'md5:618bf9652cafcc66cd277bd96789baea', + 'ext': 'mp4', + 'duration': 248, + 'upload_date': '20221119', + 'comment_count': int, + 'view_count': int, + 'age_limit': 18, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + self._set_cookie('91porn.com', 'language', 'cn_CN') + + webpage = self._download_webpage( + 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id) + + if '视频不存在,可能已经被删除或者被举报为不良内容!' in webpage: + raise ExtractorError('91 Porn says: Video does not exist', expected=True) + + daily_limit = self._search_regex( + r'作为游客,你每天只可观看([\d]+)个视频', webpage, 'exceeded daily limit', default=None, fatal=False) + if daily_limit: + raise ExtractorError(f'91 Porn says: Daily limit {daily_limit} videos exceeded', expected=True) + + video_link_url = self._search_regex( + r'document\.write\(\s*strencode2\s*\(\s*((?:"[^"]+")|(?:\'[^\']+\'))', webpage, 'video link') + video_link_url = self._search_regex( + r'src=["\']([^"\']+)["\']', urllib.parse.unquote(video_link_url), 'unquoted video link') + + formats, subtitles = self._get_formats_and_subtitle(video_link_url, video_id) + + return { + 'id': video_id, + 'title': remove_end(self._html_extract_title(webpage).replace('\n', ''), 'Chinese homemade video').strip(), + 'formats': formats, + 'subtitles': subtitles, + 'upload_date': unified_strdate(self._search_regex( + r'(\d{4}-\d{2}-\d{2})', webpage, 'upload_date', fatal=False)), + 'description': self._html_search_regex( + r'\s*([^<]+)', webpage, 'description', fatal=False), + 'duration': parse_duration(self._search_regex( + r'时长:\s*]*>\s*(\d+(?::\d+){1,2})', webpage, 'duration', fatal=False)), + 'comment_count': int_or_none(self._search_regex( + r'留言:\s*]*>\s*(\d+)\s*', webpage, 'comment count', fatal=False)), + 'view_count': int_or_none(self._search_regex( + r'热度:\s*]*>\s*(\d+)\s*', webpage, 'view count', fatal=False)), + 'age_limit': 18, + } + + def _get_formats_and_subtitle(self, video_link_url, video_id): + ext = determine_ext(video_link_url) + if ext == 'm3u8': + formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_link_url, video_id, ext='mp4') + else: + formats = [{'url': video_link_url, 'ext': ext}] + subtitles = {} + + return formats, subtitles diff --git a/yt_dlp/extractor/pornbox.py b/yt_dlp/extractor/pornbox.py new file mode 100644 index 0000000..c381382 --- /dev/null +++ b/yt_dlp/extractor/pornbox.py @@ -0,0 +1,113 @@ +from .common import InfoExtractor +from ..compat import functools +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, + qualities, + str_or_none, + traverse_obj, + url_or_none, +) + + +class PornboxIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://pornbox.com/application/watch-page/212108', + 'md5': '3ff6b6e206f263be4c5e987a3162ac6e', + 'info_dict': { + 'id': '212108', + 'ext': 'mp4', + 'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49', + 'uploader': 'Lily Strong', + 'timestamp': 1665871200, + 'upload_date': '20221015', + 'age_limit': 18, + 'availability': 'needs_auth', + 'duration': 1505, + 'cast': ['Lily Strong', 'John Strong'], + 'tags': 'count:11', + 'description': 'md5:589c7f33e183aa8aa939537300efb859', + 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$' + } + }, { + 'url': 'https://pornbox.com/application/watch-page/216045', + 'info_dict': { + 'id': '216045', + 'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2', + 'description': 'md5:3e631dcaac029f15ed434e402d1b06c7', + 'uploader': 'VK Studio', + 'timestamp': 1618264800, + 'upload_date': '20210412', + 'age_limit': 18, + 'availability': 'premium_only', + 'duration': 2710, + 'cast': 'count:3', + 'tags': 'count:29', + 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$', + 'subtitles': 'count:6' + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True + }, + 'expected_warnings': [ + 'You are either not logged in or do not have access to this scene', + 'No video formats found', 'Requested format is not available'] + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id) + + subtitles = {country_code: [{ + 'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}', + 'ext': 'srt' + }] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))} + + is_free_scene = traverse_obj( + public_data, ('price', 'is_available_for_free', {bool}), default=False) + + metadata = { + 'id': video_id, + **traverse_obj(public_data, { + 'title': ('scene_name', {str.strip}), + 'description': ('small_description', {str.strip}), + 'uploader': 'studio', + 'duration': ('runtime', {parse_duration}), + 'cast': (('models', 'male_models'), ..., 'model_name'), + 'thumbnail': ('player_poster', {url_or_none}), + 'tags': ('niches', ..., 'niche'), + }), + 'age_limit': 18, + 'timestamp': parse_iso8601(traverse_obj( + public_data, ('studios', 'release_date'), 'publish_date')), + 'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene), + 'subtitles': subtitles, + } + + if not public_data.get('is_purchased') or not is_free_scene: + self.raise_login_required( + 'You are either not logged in or do not have access to this scene', metadata_available=True) + return metadata + + media_id = traverse_obj(public_data, ( + 'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False) + if not media_id: + self.raise_no_formats('Could not find stream id', video_id=video_id) + + stream_data = self._download_json( + f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls') + + get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k']) + metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], { + 'url': 'src', + 'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}), + 'format_id': ('quality', {str_or_none}), + 'quality': ('quality', {get_quality}), + 'width': ('size', {lambda x: int(x[:-1])}), + })) + + return metadata diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py new file mode 100644 index 0000000..51a9cf3 --- /dev/null +++ b/yt_dlp/extractor/pornflip.py @@ -0,0 +1,77 @@ +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601 +) + + +class PornFlipIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:(embed|sv|v)/)?(?P[^/]+)' + _TESTS = [ + { + 'url': 'https://www.pornflip.com/dzv9Mtw1qj2/sv/brazzers-double-dare-two-couples-fucked-jenna-reid-maya-bijou', + 'info_dict': { + 'id': 'dzv9Mtw1qj2', + 'ext': 'mp4', + 'title': 'Brazzers - Double Dare Two couples fucked Jenna Reid Maya Bijou', + 'description': 'md5:d2b69e6cc743c5fd158e162aa7f05821', + 'duration': 476, + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + 'timestamp': 1617846819, + 'upload_date': '20210408', + 'uploader': 'Brazzers', + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://www.pornflip.com/v/IrJEC40i21L', + 'only_matching': True, + }, + { + 'url': 'https://www.pornflip.com/Z3jzbChC5-P/sexintaxi-e-sereyna-gomez-czech-naked-couple', + 'only_matching': True, + }, + { + 'url': 'https://www.pornflip.com/embed/bLcDFxnrZnU', + 'only_matching': True, + }, + ] + _HOST = 'www.pornflip.com' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'https://{}/sv/{}'.format(self._HOST, video_id), video_id, headers={'host': self._HOST}) + description = self._html_search_regex(r'&p\[summary\]=(.*?)\s*&p', webpage, 'description', fatal=False) + duration = self._search_regex(r'"duration":\s+"([^"]+)",', webpage, 'duration', fatal=False) + view_count = self._search_regex(r'"interactionCount":\s+"([^"]+)"', webpage, 'view_count', fatal=False) + title = self._html_search_regex(r'id="mediaPlayerTitleLink"[^>]*>(.+)', webpage, 'title', fatal=False) + uploader = self._html_search_regex(r'class="title-chanel"[^>]*>[^<]*]*>([^<]+)<', webpage, 'uploader', fatal=False) + upload_date = self._search_regex(r'"uploadDate":\s+"([^"]+)",', webpage, 'upload_date', fatal=False) + likes = self._html_search_regex( + r'class="btn btn-up-rating[^>]*>[^<]*]*>[^<]*[^>]*]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'like_count', fatal=False) + dislikes = self._html_search_regex( + r'class="btn btn-down-rating[^>]*>[^<]*]*>[^<]*[^>]*]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False) + mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&', '&') + formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash') + + return { + 'age_limit': 18, + 'description': description, + 'dislike_count': int_or_none(dislikes), + 'duration': parse_duration(duration), + 'formats': formats, + 'id': video_id, + 'like_count': int_or_none(likes), + 'timestamp': parse_iso8601(upload_date), + 'thumbnail': self._og_search_thumbnail(webpage), + 'title': title, + 'uploader': uploader, + 'view_count': int_or_none(view_count), + } diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py new file mode 100644 index 0000000..29a3e43 --- /dev/null +++ b/yt_dlp/extractor/pornhub.py @@ -0,0 +1,825 @@ +import functools +import itertools +import math +import operator +import re + +from .common import InfoExtractor +from .openload import PhantomJSwrapper +from ..compat import compat_str +from ..networking import Request +from ..networking.exceptions import HTTPError +from ..utils import ( + NO_DEFAULT, + ExtractorError, + clean_html, + determine_ext, + format_field, + int_or_none, + merge_dicts, + orderedSet, + remove_quotes, + remove_start, + str_to_int, + update_url_query, + url_or_none, + urlencode_postdata, +) + + +class PornHubBaseIE(InfoExtractor): + _NETRC_MACHINE = 'pornhub' + _PORNHUB_HOST_RE = r'(?:(?Ppornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)' + + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) + + ret = dl(*args, **kwargs) + + if not ret: + return ret + + webpage, urlh = ret + + if any(re.search(p, webpage) for p in ( + r']+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.url + if isinstance(url_or_request, Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + + def _real_initialize(self): + self._logged_in = False + + def _set_age_cookies(self, host): + self._set_cookie(host, 'age_verified', '1') + self._set_cookie(host, 'accessAgeDisclaimerPH', '1') + self._set_cookie(host, 'accessAgeDisclaimerUK', '1') + self._set_cookie(host, 'accessPH', '1') + + def _login(self, host): + if self._logged_in: + return + + site = host.split('.')[0] + + # Both sites pornhub and pornhubpremium have separate accounts + # so there should be an option to provide credentials for both. + # At the same time some videos are available under the same video id + # on both sites so that we have to identify them as the same video. + # For that purpose we have to keep both in the same extractor + # but under different netrc machines. + username, password = self._get_login_info(netrc_machine=site) + if username is None: + return + + login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '') + login_page = self._download_webpage( + login_url, None, 'Downloading %s login page' % site) + + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r'id="profileMenuDropdown"', + r'class="ph-icon-logout"')) + + if is_logged(login_page): + self._logged_in = True + return + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username, + 'password': password, + }) + + response = self._download_json( + 'https://www.%s/front/authenticate' % host, None, + 'Logging in to %s' % site, + data=urlencode_postdata(login_form), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Referer': login_url, + 'X-Requested-With': 'XMLHttpRequest', + }) + + if response.get('success') == '1': + self._logged_in = True + return + + message = response.get('message') + if message is not None: + raise ExtractorError( + 'Unable to login: %s' % message, expected=True) + + raise ExtractorError('Unable to log in') + + +class PornHubIE(PornHubBaseIE): + IE_DESC = 'PornHub and Thumbzilla' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:[^/]+\.)? + %s + /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:www\.)?thumbzilla\.com/video/ + ) + (?P[\da-z]+) + ''' % PornHubBaseIE._PORNHUB_HOST_RE + _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)'] + _TESTS = [{ + 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', + 'md5': 'a6391306d050e4547f62b3f485dd9ba9', + 'info_dict': { + 'id': '648719015', + 'ext': 'mp4', + 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', + 'uploader': 'Babes', + 'upload_date': '20130628', + 'timestamp': 1372447216, + 'duration': 361, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + 'tags': list, + 'categories': list, + 'cast': list, + }, + }, { + # non-ASCII title + 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', + 'info_dict': { + 'id': '1331683002', + 'ext': 'mp4', + 'title': '重庆婷婷女王足交', + 'upload_date': '20150213', + 'timestamp': 1423804862, + 'duration': 1753, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + 'tags': list, + 'categories': list, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy', + }, { + # subtitles + 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7', + 'info_dict': { + 'id': 'ph5af5fef7c2aa7', + 'ext': 'mp4', + 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor', + 'uploader': 'BFFs', + 'duration': 622, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + 'tags': list, + 'categories': list, + 'subtitles': { + 'en': [{ + "ext": 'srt' + }] + }, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'This video has been disabled', + }, { + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a', + 'info_dict': { + 'id': 'ph601dc30bae19a', + 'uploader': 'Projekt Melody', + 'uploader_id': 'projekt-melody', + 'upload_date': '20210205', + 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)', + 'thumbnail': r're:https?://.+', + }, + }, { + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', + 'only_matching': True, + }, { + # removed at the request of cam4.com + 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', + 'only_matching': True, + }, { + # removed at the request of the copyright owner + 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', + 'only_matching': True, + }, { + # removed by uploader + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', + 'only_matching': True, + }, { + # private video + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', + 'only_matching': True, + }, { + 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', + 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933', + 'only_matching': True, + }, { + 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', + 'only_matching': True, + }, { + # Some videos are available with the same id on both premium + # and non-premium sites (e.g. this and the following test) + 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3', + 'only_matching': True, + }, { + # geo restricted + 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156', + 'only_matching': True, + }, { + 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156', + 'only_matching': True, + }] + + def _extract_count(self, pattern, webpage, name): + return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None)) + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + host = mobj.group('host') or 'pornhub.com' + video_id = mobj.group('id') + + self._login(host) + self._set_age_cookies(host) + + def dl_webpage(platform): + self._set_cookie(host, 'platform', platform) + return self._download_webpage( + 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id), + video_id, 'Downloading %s webpage' % platform) + + webpage = dl_webpage('pc') + + error_msg = self._html_search_regex( + (r'(?s)]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P.+?)', + r'(?s)]+class=["\']noVideo["\'][^>]*>(?P.+?)'), + webpage, 'error message', default=None, group='error') + if error_msg: + error_msg = re.sub(r'\s+', ' ', error_msg) + raise ExtractorError( + 'PornHub said: %s' % error_msg, + expected=True, video_id=video_id) + + if any(re.search(p, webpage) for p in ( + r'class=["\']geoBlocked["\']', + r'>\s*This content is unavailable in your country')): + self.raise_geo_restricted() + + # video_title from flashvars contains whitespace instead of non-ASCII (see + # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying + # on that anymore. + title = self._html_search_meta( + 'twitter:title', webpage, default=None) or self._html_search_regex( + (r'(?s)]+class=["\']title["\'][^>]*>(?P.+?)</h1>', + r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1', + r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), + webpage, 'title', group='title') + + video_urls = [] + video_urls_set = set() + subtitles = {} + + flashvars = self._parse_json( + self._search_regex( + r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), + video_id) + if flashvars: + subtitle_url = url_or_none(flashvars.get('closedCaptionsFile')) + if subtitle_url: + subtitles.setdefault('en', []).append({ + 'url': subtitle_url, + 'ext': 'srt', + }) + thumbnail = flashvars.get('image_url') + duration = int_or_none(flashvars.get('video_duration')) + media_definitions = flashvars.get('mediaDefinitions') + if isinstance(media_definitions, list): + for definition in media_definitions: + if not isinstance(definition, dict): + continue + video_url = definition.get('videoUrl') + if not video_url or not isinstance(video_url, compat_str): + continue + if video_url in video_urls_set: + continue + video_urls_set.add(video_url) + video_urls.append( + (video_url, int_or_none(definition.get('quality')))) + else: + thumbnail, duration = [None] * 2 + + def extract_js_vars(webpage, pattern, default=NO_DEFAULT): + assignments = self._search_regex( + pattern, webpage, 'encoded url', default=default) + if not assignments: + return {} + + assignments = assignments.split(';') + + js_vars = {} + + def parse_js_value(inp): + inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) + if '+' in inp: + inps = inp.split('+') + return functools.reduce( + operator.concat, map(parse_js_value, inps)) + inp = inp.strip() + if inp in js_vars: + return js_vars[inp] + return remove_quotes(inp) + + for assn in assignments: + assn = assn.strip() + if not assn: + continue + assn = re.sub(r'var\s+', '', assn) + vname, value = assn.split('=', 1) + js_vars[vname] = parse_js_value(value) + return js_vars + + def add_video_url(video_url): + v_url = url_or_none(video_url) + if not v_url: + return + if v_url in video_urls_set: + return + video_urls.append((v_url, None)) + video_urls_set.add(v_url) + + def parse_quality_items(quality_items): + q_items = self._parse_json(quality_items, video_id, fatal=False) + if not isinstance(q_items, list): + return + for item in q_items: + if isinstance(item, dict): + add_video_url(item.get('url')) + + if not video_urls: + FORMAT_PREFIXES = ('media', 'quality', 'qualityItems') + js_vars = extract_js_vars( + webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), + default=None) + if js_vars: + for key, format_url in js_vars.items(): + if key.startswith(FORMAT_PREFIXES[-1]): + parse_quality_items(format_url) + elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]): + add_video_url(format_url) + if not video_urls and re.search( + r'<[^>]+\bid=["\']lockedPlayer', webpage): + raise ExtractorError( + 'Video %s is locked' % video_id, expected=True) + + if not video_urls: + js_vars = extract_js_vars( + dl_webpage('tv'), r'(var.+?mediastring.+?)</script>') + add_video_url(js_vars['mediastring']) + + for mobj in re.finditer( + r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage): + video_url = mobj.group('url') + if video_url not in video_urls_set: + video_urls.append((video_url, None)) + video_urls_set.add(video_url) + + upload_date = None + formats = [] + + def add_format(format_url, height=None): + ext = determine_ext(format_url) + if ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + return + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + return + if not height: + height = int_or_none(self._search_regex( + r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height', + default=None)) + formats.append({ + 'url': format_url, + 'format_id': format_field(height, None, '%dp'), + 'height': height, + }) + + for video_url, height in video_urls: + if not upload_date: + upload_date = self._search_regex( + r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) + if upload_date: + upload_date = upload_date.replace('/', '') + if '/video/get_media' in video_url: + medias = self._download_json(video_url, video_id, fatal=False) + if isinstance(medias, list): + for media in medias: + if not isinstance(media, dict): + continue + video_url = url_or_none(media.get('videoUrl')) + if not video_url: + continue + height = int_or_none(media.get('quality')) + add_format(video_url, height) + continue + add_format(video_url) + + model_profile = self._search_json( + r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False) + video_uploader = self._html_search_regex( + r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', + webpage, 'uploader', default=None) or model_profile.get('username') + + def extract_vote_count(kind, name): + return self._extract_count( + (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind, + r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind), + webpage, name) + + view_count = self._extract_count( + r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') + like_count = extract_vote_count('Up', 'like') + dislike_count = extract_vote_count('Down', 'dislike') + comment_count = self._extract_count( + r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') + + def extract_list(meta_key): + div = self._search_regex( + r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' + % meta_key, webpage, meta_key, default=None) + if div: + return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)] + + info = self._search_json_ld(webpage, video_id, default={}) + # description provided in JSON-LD is irrelevant + info['description'] = None + + return merge_dicts({ + 'id': video_id, + 'uploader': video_uploader, + 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'), + 'upload_date': upload_date, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'comment_count': comment_count, + 'formats': formats, + 'age_limit': 18, + 'tags': extract_list('tags'), + 'categories': extract_list('categories'), + 'cast': extract_list('pornstars'), + 'subtitles': subtitles, + }, info) + + +class PornHubPlaylistBaseIE(PornHubBaseIE): + def _extract_page(self, url): + return int_or_none(self._search_regex( + r'\bpage=(\d+)', url, 'page', default=None)) + + def _extract_entries(self, webpage, host): + # Only process container div with main playlist content skipping + # drop-down menu that uses similar pattern for videos (see + # https://github.com/ytdl-org/youtube-dl/issues/11594). + container = self._search_regex( + r'(?s)(<div[^>]+class=["\']container.+)', webpage, + 'container', default=webpage) + + return [ + self.url_result( + 'http://www.%s/%s' % (host, video_url), + PornHubIE.ie_key(), video_title=title) + for video_url, title in orderedSet(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', + container)) + ] + + +class PornHubUserIE(PornHubPlaylistBaseIE): + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph', + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious', + 'info_dict': { + 'id': 'liz-vicious', + }, + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/users/russianveet69', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/channels/povd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', + 'only_matching': True, + }, { + # Unavailable via /videos page, but available with direct pagination + # on pornstar page (see [1]), requires premium + # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 + 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west', + 'only_matching': True, + }, { + # Same as before, multi page + 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau', + 'only_matching': True, + }, { + 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + user_id = mobj.group('id') + videos_url = '%s/videos' % mobj.group('url') + self._set_age_cookies(mobj.group('host')) + page = self._extract_page(url) + if page: + videos_url = update_url_query(videos_url, {'page': page}) + return self.url_result( + videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) + + +class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + <li[^>]+\bclass=["\']page_next| + <link[^>]+\brel=["\']next| + <button[^>]+\bid=["\']moreDataBtn + ''', webpage) is not None + + def _entries(self, url, host, item_id): + page = self._extract_page(url) + + VIDEOS = '/videos' + + def download_page(base_url, num, fallback=False): + note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '') + return self._download_webpage( + base_url, item_id, note, query={'page': num}) + + def is_404(e): + return isinstance(e.cause, HTTPError) and e.cause.status == 404 + + base_url = url + has_page = page is not None + first_page = page if has_page else 1 + for page_num in (first_page, ) if has_page else itertools.count(first_page): + try: + try: + webpage = download_page(base_url, page_num) + except ExtractorError as e: + # Some sources may not be available via /videos page, + # trying to fallback to main page pagination (see [1]) + # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 + if is_404(e) and page_num == first_page and VIDEOS in base_url: + base_url = base_url.replace(VIDEOS, '') + webpage = download_page(base_url, page_num, fallback=True) + else: + raise + except ExtractorError as e: + if is_404(e) and page_num != first_page: + break + raise + page_entries = self._extract_entries(webpage, host) + if not page_entries: + break + for e in page_entries: + yield e + if not self._has_more(webpage): + break + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + host = mobj.group('host') + item_id = mobj.group('id') + + self._login(host) + self._set_age_cookies(host) + + return self.playlist_result(self._entries(url, host, item_id), item_id) + + +class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph/videos', + 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', + 'info_dict': { + 'id': 'pornstar/jenny-blighe/videos', + }, + 'playlist_mincount': 149, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', + 'info_dict': { + 'id': 'pornstar/jenny-blighe/videos', + }, + 'playlist_mincount': 40, + }, { + # default sorting as Top Rated Videos + 'url': 'https://www.pornhub.com/channels/povd/videos', + 'info_dict': { + 'id': 'channels/povd/videos', + }, + 'playlist_mincount': 293, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra', + 'only_matching': True, + }, { + # Most Recent Videos + 'url': 'https://www.pornhub.com/channels/povd/videos?o=da', + 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi', + 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', + 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', + 'only_matching': True, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', + 'only_matching': True, + }, { + # Longest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', + 'only_matching': True, + }, { + # Newest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/search?search=123', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video?page=2', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', + 'only_matching': True, + }, { + 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return (False + if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubPagedVideoListIE, cls).suitable(url)) + + +class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE + _TESTS = [{ + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 129, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, + }, { + 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload', + 'only_matching': True, + }] + + +class PornHubPlaylistIE(PornHubPlaylistBaseIE): + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE + _TESTS = [{ + 'url': 'https://www.pornhub.com/playlist/44121572', + 'info_dict': { + 'id': '44121572', + }, + 'playlist_count': 77, + }, { + 'url': 'https://www.pornhub.com/playlist/4667351', + 'only_matching': True, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351', + 'only_matching': True, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351?page=2', + 'only_matching': True, + }] + + def _entries(self, url, host, item_id): + webpage = self._download_webpage(url, item_id, 'Downloading page 1') + playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id') + video_count = int_or_none( + self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count')) + token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token') + page_count = math.ceil((video_count - 36) / 40.) + 1 + page_entries = self._extract_entries(webpage, host) + + def download_page(page_num): + note = 'Downloading page {}'.format(page_num) + page_url = 'https://www.{}/playlist/viewChunked'.format(host) + return self._download_webpage(page_url, item_id, note, query={ + 'id': playlist_id, + 'page': page_num, + 'token': token, + }) + + for page_num in range(1, page_count + 1): + if page_num > 1: + webpage = download_page(page_num) + page_entries = self._extract_entries(webpage, host) + if not page_entries: + break + for e in page_entries: + yield e + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + host = mobj.group('host') + item_id = mobj.group('id') + + self._login(host) + self._set_age_cookies(host) + + return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id) diff --git a/yt_dlp/extractor/pornotube.py b/yt_dlp/extractor/pornotube.py new file mode 100644 index 0000000..e0960f4 --- /dev/null +++ b/yt_dlp/extractor/pornotube.py @@ -0,0 +1,83 @@ +import json + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PornotubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science', + 'md5': '60fc5a4f0d93a97968fc7999d98260c9', + 'info_dict': { + 'id': '4964', + 'ext': 'mp4', + 'upload_date': '20141203', + 'title': 'Weird Hot and Wet Science', + 'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0', + 'categories': ['Adult Humor', 'Blondes'], + 'uploader': 'Alpha Blue Archives', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1417582800, + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + token = self._download_json( + 'https://api.aebn.net/auth/v2/origins/authenticate', + video_id, note='Downloading token', + data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + 'Origin': 'http://www.pornotube.com', + })['tokenKey'] + + video_url = self._download_json( + 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id, + video_id, note='Downloading delivery information', + headers={'Authorization': token})['mediaUrl'] + + FIELDS = ( + 'title', 'description', 'startSecond', 'endSecond', 'publishDate', + 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber' + ) + + info = self._download_json( + 'https://api.aebn.net/content/v2/clips/%s?fields=%s' + % (video_id, ','.join(FIELDS)), video_id, + note='Downloading metadata', + headers={'Authorization': token}) + + if isinstance(info, list): + info = info[0] + + title = info['title'] + + timestamp = int_or_none(info.get('publishDate'), scale=1000) + uploader = info.get('studios', [{}])[0].get('name') + movie_id = info.get('movieId') + primary_image_number = info.get('primaryImageNumber') + thumbnail = None + if movie_id and primary_image_number: + thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % ( + movie_id, movie_id, primary_image_number) + start = int_or_none(info.get('startSecond')) + end = int_or_none(info.get('endSecond')) + duration = end - start if start and end else None + categories = [c['name'] for c in info.get('categories', []) if c.get('name')] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': info.get('description'), + 'duration': duration, + 'timestamp': timestamp, + 'uploader': uploader, + 'thumbnail': thumbnail, + 'categories': categories, + 'age_limit': 18, + } diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py new file mode 100644 index 0000000..2e51b4f --- /dev/null +++ b/yt_dlp/extractor/pornovoisines.py @@ -0,0 +1,103 @@ +from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, + unified_strdate, +) + + +class PornoVoisinesIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)' + + _TEST = { + 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', + 'md5': '6f8aca6a058592ab49fe701c8ba8317b', + 'info_dict': { + 'id': '919', + 'display_id': 'recherche-appartement', + 'ext': 'mp4', + 'title': 'Recherche appartement', + 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20140925', + 'duration': 120, + 'view_count': int, + 'average_rating': float, + 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], + 'age_limit': 18, + 'subtitles': { + 'fr': [{ + 'ext': 'vtt', + }] + }, + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + settings_url = self._download_json( + 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, + video_id, note='Getting settings URL')['video_settings_url'] + settings = self._download_json(settings_url, video_id)['data'] + + formats = [] + for kind, data in settings['variants'].items(): + if kind == 'HLS': + formats.extend(self._extract_m3u8_formats( + data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) + elif kind == 'MP4': + for item in data: + formats.append({ + 'url': item['url'], + 'height': item.get('height'), + 'bitrate': item.get('bitrate'), + }) + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + # The webpage has a bug - there's no space between "thumb" and src= + thumbnail = self._html_search_regex( + r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2', + webpage, 'thumbnail', fatal=False, group='url') + + upload_date = unified_strdate(self._search_regex( + r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False)) + duration = settings.get('main', {}).get('duration') + view_count = int_or_none(self._search_regex( + r'(\d+) vues', webpage, 'view count', fatal=False)) + average_rating = self._search_regex( + r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False) + if average_rating: + average_rating = float_or_none(average_rating.replace(',', '.')) + + categories = self._html_search_regex( + r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False) + if categories: + categories = [category.strip() for category in categories.split(',')] + + subtitles = {'fr': [{ + 'url': subtitle, + } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} + + return { + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'average_rating': average_rating, + 'categories': categories, + 'age_limit': 18, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/pornoxo.py b/yt_dlp/extractor/pornoxo.py new file mode 100644 index 0000000..049feb4 --- /dev/null +++ b/yt_dlp/extractor/pornoxo.py @@ -0,0 +1,55 @@ +from .common import InfoExtractor +from ..utils import ( + str_to_int, +) + + +class PornoXOIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' + _TEST = { + 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', + 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', + 'info_dict': { + 'id': '7564', + 'ext': 'flv', + 'title': 'Striptease From Sexy Secretary!', + 'display_id': 'striptease-from-sexy-secretary', + 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', + 'categories': list, # NSFW + 'thumbnail': r're:https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id, display_id = mobj.groups() + + webpage = self._download_webpage(url, video_id) + video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) + + title = self._html_search_regex( + r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title') + + view_count = str_to_int(self._html_search_regex( + r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) + + categories_str = self._html_search_regex( + r'<meta name="description" content=".*featuring\s*([^"]+)"', + webpage, 'categories', fatal=False) + categories = ( + None if categories_str is None + else categories_str.split(',')) + + video_data.update({ + 'id': video_id, + 'title': title, + 'display_id': display_id, + 'description': self._html_search_meta('description', webpage), + 'categories': categories, + 'view_count': view_count, + 'age_limit': 18, + }) + + return video_data diff --git a/yt_dlp/extractor/pr0gramm.py b/yt_dlp/extractor/pr0gramm.py new file mode 100644 index 0000000..66f8a5f --- /dev/null +++ b/yt_dlp/extractor/pr0gramm.py @@ -0,0 +1,201 @@ +import json +from urllib.parse import unquote + +from .common import InfoExtractor +from ..compat import functools +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + make_archive_id, + mimetype2ext, + str_or_none, + urljoin, +) +from ..utils.traversal import traverse_obj + + +class Pr0grammIE(InfoExtractor): + _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)' + _TESTS = [{ + 'url': 'https://pr0gramm.com/new/video/5466437', + 'info_dict': { + 'id': '5466437', + 'ext': 'mp4', + 'title': 'pr0gramm-5466437 by g11st', + 'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'], + 'uploader': 'g11st', + 'uploader_id': '394718', + 'timestamp': 1671590240, + 'upload_date': '20221221', + 'like_count': int, + 'dislike_count': int, + 'age_limit': 0, + 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', + '_old_archive_ids': ['pr0grammstatic 5466437'], + }, + }, { + 'url': 'https://pr0gramm.com/new/3052805:comment28391322', + 'info_dict': { + 'id': '3052805', + 'ext': 'mp4', + 'title': 'pr0gramm-3052805 by Hansking1', + 'tags': 'count:15', + 'uploader': 'Hansking1', + 'uploader_id': '385563', + 'timestamp': 1552930408, + 'upload_date': '20190318', + 'like_count': int, + 'dislike_count': int, + 'age_limit': 0, + 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', + '_old_archive_ids': ['pr0grammstatic 3052805'], + }, + }, { + # Requires verified account + 'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332', + 'info_dict': { + 'id': '5848332', + 'ext': 'mp4', + 'title': 'pr0gramm-5848332 by erd0pfel', + 'tags': 'count:18', + 'uploader': 'erd0pfel', + 'uploader_id': '349094', + 'timestamp': 1694489652, + 'upload_date': '20230912', + 'like_count': int, + 'dislike_count': int, + 'age_limit': 18, + 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', + '_old_archive_ids': ['pr0grammstatic 5848332'], + }, + }, { + 'url': 'https://pr0gramm.com/top/5895149', + 'info_dict': { + 'id': '5895149', + 'ext': 'mp4', + 'title': 'pr0gramm-5895149 by algoholigSeeManThrower', + 'tags': 'count:19', + 'uploader': 'algoholigSeeManThrower', + 'uploader_id': '457556', + 'timestamp': 1697580902, + 'upload_date': '20231018', + 'like_count': int, + 'dislike_count': int, + 'age_limit': 0, + 'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg', + '_old_archive_ids': ['pr0grammstatic 5895149'], + }, + }, { + 'url': 'https://pr0gramm.com/static/5466437', + 'only_matching': True, + }, { + 'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805', + 'only_matching': True, + }, { + 'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290', + 'only_matching': True, + }] + + BASE_URL = 'https://pr0gramm.com' + + @functools.cached_property + def _is_logged_in(self): + return 'pp' in self._get_cookies(self.BASE_URL) + + @functools.cached_property + def _maximum_flags(self): + # We need to guess the flags for the content otherwise the api will raise an error + # We can guess the maximum allowed flags for the account from the cookies + # Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw + flags = 0b10001 + if self._is_logged_in: + flags |= 0b01000 + cookies = self._get_cookies(self.BASE_URL) + if 'me' not in cookies: + self._download_webpage(self.BASE_URL, None, 'Refreshing verification information') + if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')): + flags |= 0b00110 + + return flags + + def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'): + data = self._download_json( + f'https://pr0gramm.com/api/items/{endpoint}', + video_id, note, query=query, expected_status=403) + + error = traverse_obj(data, ('error', {str})) + if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'): + if not self._is_logged_in: + self.raise_login_required() + raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True) + elif error: + message = traverse_obj(data, ('msg', {str})) or error + raise ExtractorError(f'API returned error: {message}', expected=True) + + return data + + @staticmethod + def _create_source_url(path): + return urljoin('https://img.pr0gramm.com', path) + + def _real_extract(self, url): + video_id = self._match_id(url) + video_info = traverse_obj( + self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}), + ('items', 0, {dict})) + + source = video_info.get('image') + if not source or not source.endswith('mp4'): + self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id) + + metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags') + tags = traverse_obj(metadata, ('tags', ..., 'tag', {str})) + # Sorted by "confidence", higher confidence = earlier in list + confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float}))) + if confidences: + tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)] + + formats = traverse_obj(video_info, ('variants', ..., { + 'format_id': ('name', {str}), + 'url': ('path', {self._create_source_url}), + 'ext': ('mimeType', {mimetype2ext}), + 'vcodec': ('codec', {str}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + 'bitrate': ('bitRate', {float_or_none}), + 'filesize': ('fileSize', {int_or_none}), + })) if video_info.get('variants') else [{ + 'ext': 'mp4', + 'format_id': 'source', + **traverse_obj(video_info, { + 'url': ('image', {self._create_source_url}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + }] + + subtitles = {} + for subtitle in traverse_obj(video_info, ('subtitles', lambda _, v: v['language'])): + subtitles.setdefault(subtitle['language'], []).append(traverse_obj(subtitle, { + 'url': ('path', {self._create_source_url}), + 'note': ('label', {str}), + })) + + return { + 'id': video_id, + 'title': f'pr0gramm-{video_id} by {video_info.get("user")}', + 'tags': tags, + 'formats': formats, + 'subtitles': subtitles, + 'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0, + '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)], + **traverse_obj(video_info, { + 'uploader': ('user', {str}), + 'uploader_id': ('userId', {str_or_none}), + 'like_count': ('up', {int}), + 'dislike_count': ('down', {int}), + 'timestamp': ('created', {int}), + 'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}) + }), + } diff --git a/yt_dlp/extractor/prankcast.py b/yt_dlp/extractor/prankcast.py new file mode 100644 index 0000000..56cd40d --- /dev/null +++ b/yt_dlp/extractor/prankcast.py @@ -0,0 +1,137 @@ +import json + +from .common import InfoExtractor +from ..utils import float_or_none, parse_iso8601, str_or_none, try_call +from ..utils.traversal import traverse_obj + + +class PrankCastIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/showreel/(?P<id>\d+)-(?P<display_id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://prankcast.com/Devonanustart/showreel/1561-Beverly-is-back-like-a-heart-attack-', + 'info_dict': { + 'id': '1561', + 'ext': 'mp3', + 'title': 'Beverly is back like a heart attack!', + 'display_id': 'Beverly-is-back-like-a-heart-attack-', + 'timestamp': 1661391575, + 'uploader': 'Devonanustart', + 'channel_id': '4', + 'duration': 7918, + 'cast': ['Devonanustart', 'Phonelosers'], + 'description': '', + 'categories': ['prank'], + 'tags': ['prank call', 'prank', 'live show'], + 'upload_date': '20220825' + } + }, { + 'url': 'https://prankcast.com/phonelosers/showreel/2048-NOT-COOL', + 'info_dict': { + 'id': '2048', + 'ext': 'mp3', + 'title': 'NOT COOL', + 'display_id': 'NOT-COOL', + 'timestamp': 1665028364, + 'uploader': 'phonelosers', + 'channel_id': '6', + 'duration': 4044, + 'cast': ['phonelosers'], + 'description': '', + 'categories': ['prank'], + 'tags': ['prank call', 'prank', 'live show'], + 'upload_date': '20221006' + } + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') + + webpage = self._download_webpage(url, video_id) + json_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_showreel'] + + uploader = json_info.get('user_name') + guests_json = self._parse_json(json_info.get('guests_json') or '{}', video_id) + start_date = parse_iso8601(json_info.get('start_date')) + + return { + 'id': video_id, + 'title': json_info.get('broadcast_title') or self._og_search_title(webpage), + 'display_id': display_id, + 'url': f'{json_info["broadcast_url"]}{json_info["recording_hash"]}.mp3', + 'timestamp': start_date, + 'uploader': uploader, + 'channel_id': str_or_none(json_info.get('user_id')), + 'duration': try_call(lambda: parse_iso8601(json_info['end_date']) - start_date), + 'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))), + 'description': json_info.get('broadcast_description'), + 'categories': [json_info.get('broadcast_category')], + 'tags': try_call(lambda: json_info['broadcast_tags'].split(',')) + } + + +class PrankCastPostIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-', + 'info_dict': { + 'id': '6214', + 'ext': 'mp3', + 'title': 'Happy National Rachel Day!', + 'display_id': 'happy-national-rachel-day-', + 'timestamp': 1704333938, + 'uploader': 'Devonanustart', + 'channel_id': '4', + 'duration': 13175, + 'cast': ['Devonanustart'], + 'description': '', + 'categories': ['prank call'], + 'upload_date': '20240104' + } + }, { + 'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-', + 'info_dict': { + 'id': '6217', + 'ext': 'mp3', + 'title': 'Jake the Work Crow!', + 'display_id': 'jake-the-work-crow-', + 'timestamp': 1704346592, + 'uploader': 'despicabledogs', + 'channel_id': '957', + 'duration': 263.287, + 'cast': ['despicabledogs'], + 'description': 'https://imgur.com/a/vtxLvKU', + 'categories': [], + 'upload_date': '20240104' + } + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') + + webpage = self._download_webpage(url, video_id) + post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts'] + content = self._parse_json(post['post_contents_json'], video_id)[0] + + uploader = post.get('user_name') + guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {} + + return { + 'id': video_id, + 'title': post.get('post_title') or self._og_search_title(webpage), + 'display_id': display_id, + 'url': content.get('url'), + 'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '), + 'uploader': uploader, + 'channel_id': str_or_none(post.get('user_id')), + 'duration': float_or_none(content.get('duration')), + 'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))), + 'description': post.get('post_body'), + 'categories': list(filter(None, [content.get('category')])), + 'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))), + 'subtitles': { + 'live_chat': [{ + 'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=', + 'ext': 'json', + }], + } if post.get('content_id') else None + } diff --git a/yt_dlp/extractor/premiershiprugby.py b/yt_dlp/extractor/premiershiprugby.py new file mode 100644 index 0000000..67d41fd --- /dev/null +++ b/yt_dlp/extractor/premiershiprugby.py @@ -0,0 +1,39 @@ +from .common import InfoExtractor +from ..utils import int_or_none, traverse_obj + + +class PremiershipRugbyIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)premiershiprugby\.(?:com)/watch/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://www.premiershiprugby.com/watch/full-match-harlequins-v-newcastle-falcons', + 'info_dict': { + 'id': '0_mbkb7ldt', + 'title': 'Full Match: Harlequins v Newcastle Falcons', + 'ext': 'mp4', + 'thumbnail': 'https://open.http.mp.streamamg.com/p/3000914/sp/300091400/thumbnail/entry_id/0_mbkb7ldt//width/960/height/540/type/1/quality/75', + 'duration': 6093.0, + 'tags': ['video'], + 'categories': ['Full Match', 'Harlequins', 'Newcastle Falcons', 'gallaher premiership'], + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + json_data = self._download_json( + f'https://article-cms-api.incrowdsports.com/v2/articles/slug/{display_id}', + display_id, query={'clientId': 'PRL'})['data']['article'] + + formats, subs = self._extract_m3u8_formats_and_subtitles( + json_data['heroMedia']['content']['videoLink'], display_id) + + return { + 'id': json_data['heroMedia']['content']['sourceSystemId'], + 'display_id': display_id, + 'title': traverse_obj(json_data, ('heroMedia', 'title')), + 'formats': formats, + 'subtitles': subs, + 'thumbnail': traverse_obj(json_data, ('heroMedia', 'content', 'videoThumbnail')), + 'duration': int_or_none(traverse_obj(json_data, ('heroMedia', 'content', 'metadata', 'msDuration')), scale=1000), + 'tags': json_data.get('tags'), + 'categories': traverse_obj(json_data, ('categories', ..., 'text')), + } diff --git a/yt_dlp/extractor/presstv.py b/yt_dlp/extractor/presstv.py new file mode 100644 index 0000000..26ce74a --- /dev/null +++ b/yt_dlp/extractor/presstv.py @@ -0,0 +1,69 @@ +from .common import InfoExtractor +from ..utils import remove_start + + +class PressTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?' + + _TEST = { + 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', + 'md5': '5d7e3195a447cb13e9267e931d8dd5a5', + 'info_dict': { + 'id': '459911', + 'display_id': 'Australian-sewerage-treatment-facility-', + 'ext': 'mp4', + 'title': 'Organic mattresses used to clean waste water', + 'upload_date': '20160409', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'md5:20002e654bbafb6908395a5c0cfcd125' + } + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + # extract video URL from webpage + video_url = self._hidden_inputs(webpage)['inpPlayback'] + + # build list of available formats + # specified in http://www.presstv.ir/Scripts/playback.js + base_url = 'http://192.99.219.222:82/presstv' + _formats = [ + (180, '_low200.mp4'), + (360, '_low400.mp4'), + (720, '_low800.mp4'), + (1080, '.mp4') + ] + + formats = [{ + 'url': base_url + video_url[:-4] + extension, + 'format_id': '%dp' % height, + 'height': height, + } for height, extension in _formats] + + # extract video metadata + title = remove_start( + self._html_search_meta('title', webpage, fatal=True), 'PressTV-') + + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) + + upload_date = '%04d%02d%02d' % ( + int(mobj.group('y')), + int(mobj.group('m')), + int(mobj.group('d')), + ) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'description': description + } diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py new file mode 100644 index 0000000..daf1405 --- /dev/null +++ b/yt_dlp/extractor/projectveritas.py @@ -0,0 +1,52 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, + unified_strdate, +) + + +class ProjectVeritasIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/', + 'info_dict': { + 'id': '51910aab-365a-5cf1-88f2-8eb1ca5fd3c6', + 'ext': 'mp4', + 'title': 'Exclusive: Inside The New York and New Jersey Hospitals Battling Coronavirus', + 'upload_date': '20200327', + 'thumbnail': 'md5:6076477fe50b03eb8708be9415e18e1c', + } + }, { + 'url': 'https://www.projectveritas.com/video/ilhan-omar-connected-ballot-harvester-in-cash-for-ballots-scheme-car-is-full/', + 'info_dict': { + 'id': 'c5aab304-a56b-54b1-9f0b-03b77bc5f2f6', + 'ext': 'mp4', + 'title': 'Ilhan Omar connected Ballot Harvester in cash-for-ballots scheme: "Car is full" of absentee ballots', + 'upload_date': '20200927', + 'thumbnail': 'md5:194b8edf0e2ba64f25500ff4378369a4', + } + }] + + def _real_extract(self, url): + id, type = self._match_valid_url(url).group('id', 'type') + api_url = f'https://www.projectveritas.com/page-data/{type}/{id}/page-data.json' + data_json = self._download_json(api_url, id)['result']['data'] + main_data = traverse_obj(data_json, 'video', 'post') + video_id = main_data['id'] + thumbnail = traverse_obj(main_data, ('image', 'ogImage', 'src')) + mux_asset = traverse_obj(main_data, + 'muxAsset', ('body', 'json', 'content', ..., 'data', 'target', 'fields', 'muxAsset'), + get_all=False, expected_type=dict) + if not mux_asset: + raise ExtractorError('No video on the provided url.', expected=True) + playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId')) + formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id) + return { + 'id': video_id, + 'title': main_data['title'], + 'upload_date': unified_strdate(main_data.get('date')), + 'thumbnail': thumbnail.replace('//', ''), + 'formats': formats, + } diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py new file mode 100644 index 0000000..46e2e8a --- /dev/null +++ b/yt_dlp/extractor/prosiebensat1.py @@ -0,0 +1,496 @@ +import re + +from hashlib import sha1 +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + determine_ext, + float_or_none, + int_or_none, + merge_dicts, + unified_strdate, +) + + +class ProSiebenSat1BaseIE(InfoExtractor): + _GEO_BYPASS = False + _ACCESS_ID = None + _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' + _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' + + def _extract_video_info(self, url, clip_id): + client_location = url + + video = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos', + clip_id, 'Downloading videos JSON', query={ + 'access_token': self._TOKEN, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'ids': clip_id, + })[0] + + if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True: + self.report_drm(clip_id) + + formats = [] + if self._ACCESS_ID: + raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID + protocols = self._download_json( + self._V4_BASE_URL + 'protocols', clip_id, + 'Downloading protocols JSON', + headers=self.geo_verification_headers(), query={ + 'access_id': self._ACCESS_ID, + 'client_token': sha1((raw_ct).encode()).hexdigest(), + 'video_id': clip_id, + }, fatal=False, expected_status=(403,)) or {} + error = protocols.get('error') or {} + if error.get('title') == 'Geo check failed': + self.raise_geo_restricted(countries=['AT', 'CH', 'DE']) + server_token = protocols.get('server_token') + if server_token: + urls = (self._download_json( + self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ + 'access_id': self._ACCESS_ID, + 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), + 'protocols': self._SUPPORTED_PROTOCOLS, + 'server_token': server_token, + 'video_id': clip_id, + }, fatal=False) or {}).get('urls') or {} + for protocol, variant in urls.items(): + source_url = variant.get('clear', {}).get('url') + if not source_url: + continue + if protocol == 'dash': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id=protocol, fatal=False)) + elif protocol == 'hls': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id=protocol, fatal=False)) + else: + formats.append({ + 'url': source_url, + 'format_id': protocol, + }) + if not formats: + source_ids = [compat_str(source['id']) for source in video['sources']] + + client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + + sources = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, + clip_id, 'Downloading sources JSON', query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + }) + server_id = sources['server_id'] + + def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None + return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate + + for source_id in source_ids: + client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + urls = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, + clip_id, 'Downloading urls JSON', fatal=False, query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'server_id': server_id, + 'source_ids': source_id, + }) + if not urls: + continue + if urls.get('status_code') != 0: + raise ExtractorError('This video is unavailable', expected=True) + urls_sources = urls['sources'] + if isinstance(urls_sources, dict): + urls_sources = urls_sources.values() + for source in urls_sources: + source_url = source.get('url') + if not source_url: + continue + protocol = source.get('protocol') + mimetype = source.get('mimetype') + if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, clip_id, f4m_id='hds', fatal=False)) + elif mimetype == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif mimetype == 'application/dash+xml': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id='dash', fatal=False)) + else: + tbr = fix_bitrate(source['bitrate']) + if protocol in ('rtmp', 'rtmpe'): + mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) + if not mobj: + continue + path = mobj.group('path') + mp4colon_index = path.rfind('mp4:') + app = path[:mp4colon_index] + play_path = path[mp4colon_index:] + formats.append({ + 'url': '%s/%s' % (mobj.group('url'), app), + 'app': app, + 'play_path': play_path, + 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', + 'page_url': 'http://www.prosieben.de', + 'tbr': tbr, + 'ext': 'flv', + 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), + }) + else: + formats.append({ + 'url': source_url, + 'tbr': tbr, + 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), + }) + + return { + 'duration': float_or_none(video.get('duration')), + 'formats': formats, + } + + +class ProSiebenSat1IE(ProSiebenSat1BaseIE): + IE_NAME = 'prosiebensat1' + IE_DESC = 'ProSiebenSat.1 Digital' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + (?:beta\.)? + (?: + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia + )\.(?:de|at|ch)| + ran\.de|fem\.com|advopedia\.de|galileo\.tv/video + ) + /(?P<id>.+) + ''' + + _TESTS = [ + { + # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242 + # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215: + # - malformed f4m manifest support + # - proper handling of URLs starting with `https?://` in 2.0 manifests + # - recursive child f4m manifests extraction + 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', + 'info_dict': { + 'id': '2104602', + 'ext': 'mp4', + 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2', + 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', + 'upload_date': '20131231', + 'duration': 5845.04, + 'series': 'CIRCUS HALLIGALLI', + 'season_number': 2, + 'episode': 'Episode 18 - Staffel 2', + 'episode_number': 18, + }, + }, + { + 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', + 'info_dict': { + 'id': '2570327', + 'ext': 'mp4', + 'title': 'Lady-Umstyling für Audrina', + 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', + 'upload_date': '20131014', + 'duration': 606.76, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Seems to be broken', + }, + { + 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', + 'info_dict': { + 'id': '2429369', + 'ext': 'mp4', + 'title': 'Countdown für die Autowerkstatt', + 'description': 'md5:809fc051a457b5d8666013bc40698817', + 'upload_date': '20140223', + 'duration': 2595.04, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', + 'info_dict': { + 'id': '2904997', + 'ext': 'mp4', + 'title': 'Sexy laufen in Ugg Boots', + 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', + 'upload_date': '20140122', + 'duration': 245.32, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', + 'info_dict': { + 'id': '2906572', + 'ext': 'mp4', + 'title': 'Im Interview: Kai Wiesinger', + 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', + 'upload_date': '20140203', + 'duration': 522.56, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', + 'info_dict': { + 'id': '2992323', + 'ext': 'mp4', + 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', + 'description': 'md5:2669cde3febe9bce13904f701e774eb6', + 'upload_date': '20141014', + 'duration': 2410.44, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', + 'info_dict': { + 'id': '3004256', + 'ext': 'mp4', + 'title': 'Schalke: Tönnies möchte Raul zurück', + 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', + 'upload_date': '20140226', + 'duration': 228.96, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', + 'info_dict': { + 'id': '2572814', + 'ext': 'mp4', + 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man', + 'description': 'md5:6ddb02b0781c6adf778afea606652e38', + 'timestamp': 1382041620, + 'upload_date': '20131017', + 'duration': 469.88, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag', + 'info_dict': { + 'id': '2156342', + 'ext': 'mp4', + 'title': 'Kurztrips zum Valentinstag', + 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', + 'duration': 307.24, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', + 'info_dict': { + 'id': '439664', + 'title': 'Episode 8 - Ganze Folge - Playlist', + 'description': 'md5:63b8963e71f481782aeea877658dec84', + }, + 'playlist_count': 2, + 'skip': 'This video is unavailable', + }, + { + # title in <h2 class="subtitle"> + 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', + 'info_dict': { + 'id': '4895826', + 'ext': 'mp4', + 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', + 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', + 'upload_date': '20170302', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'geo restricted to Germany', + }, + { + # geo restricted to Germany + 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', + 'only_matching': True, + }, + { + # geo restricted to Germany + 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', + 'only_matching': True, + }, + { + # geo restricted to Germany + 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', + 'only_matching': True, + }, + { + 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', + 'only_matching': True, + }, + { + 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', + 'only_matching': True, + }, + ] + + _TOKEN = 'prosieben' + _SALT = '01!8d8F_)r9]4s[qeuXfP%' + _CLIENT_NAME = 'kolibri-2.0.19-splec4' + + _ACCESS_ID = 'x_prosiebenmaxx-de' + _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag' + _IV = 'Aeluchoc6aevechuipiexeeboowedaok' + + _CLIPID_REGEXES = [ + r'"clip_id"\s*:\s+"(\d+)"', + r'clipid: "(\d+)"', + r'clip[iI]d=(\d+)', + r'clip[iI][dD]\s*=\s*["\'](\d+)', + r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", + r'proMamsId"\s*:\s*"(\d+)', + r'proMamsId"\s*:\s*"(\d+)', + ] + _TITLE_REGEXES = [ + r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', + r'<header class="clearfix">\s*<h3>(.+?)</h3>', + r'<!-- start video -->\s*<h1>(.+?)</h1>', + r'<h1 class="att-name">\s*(.+?)</h1>', + r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', + r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', + r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>', + r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>', + ] + _DESCRIPTION_REGEXES = [ + r'<p itemprop="description">\s*(.+?)</p>', + r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', + r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', + r'<p class="att-description">\s*(.+?)\s*</p>', + r'<p class="video-description" itemprop="description">\s*(.+?)</p>', + r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>', + ] + _UPLOAD_DATE_REGEXES = [ + r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', + r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', + r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', + r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', + ] + _PAGE_TYPE_REGEXES = [ + r'<meta name="page_type" content="([^"]+)">', + r"'itemType'\s*:\s*'([^']*)'", + ] + _PLAYLIST_ID_REGEXES = [ + r'content[iI]d=(\d+)', + r"'itemId'\s*:\s*'([^']*)'", + ] + _PLAYLIST_CLIP_REGEXES = [ + r'(?s)data-qvt=.+?<a href="([^"]+)"', + ] + + def _extract_clip(self, url, webpage): + clip_id = self._html_search_regex( + self._CLIPID_REGEXES, webpage, 'clip id') + title = self._html_search_regex( + self._TITLE_REGEXES, webpage, 'title', + default=None) or self._og_search_title(webpage) + info = self._extract_video_info(url, clip_id) + description = self._html_search_regex( + self._DESCRIPTION_REGEXES, webpage, 'description', default=None) + if description is None: + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + upload_date = unified_strdate( + self._html_search_meta('og:published_time', webpage, + 'upload date', default=None) + or self._html_search_regex(self._UPLOAD_DATE_REGEXES, + webpage, 'upload date', default=None)) + + json_ld = self._search_json_ld(webpage, clip_id, default={}) + + return merge_dicts(info, { + 'id': clip_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + }, json_ld) + + def _extract_playlist(self, url, webpage): + playlist_id = self._html_search_regex( + self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') + playlist = self._parse_json( + self._search_regex( + r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', + webpage, 'playlist'), + playlist_id) + entries = [] + for item in playlist: + clip_id = item.get('id') or item.get('upc') + if not clip_id: + continue + info = self._extract_video_info(url, clip_id) + info.update({ + 'id': clip_id, + 'title': item.get('title') or item.get('teaser', {}).get('headline'), + 'description': item.get('teaser', {}).get('description'), + 'thumbnail': item.get('poster'), + 'duration': float_or_none(item.get('duration')), + 'series': item.get('tvShowTitle'), + 'uploader': item.get('broadcastPublisher'), + }) + entries.append(info) + return self.playlist_result(entries, playlist_id) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + page_type = self._search_regex( + self._PAGE_TYPE_REGEXES, webpage, + 'page type', default='clip').lower() + if page_type == 'clip': + return self._extract_clip(url, webpage) + elif page_type == 'playlist': + return self._extract_playlist(url, webpage) + else: + raise ExtractorError( + 'Unsupported page type %s' % page_type, expected=True) diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py new file mode 100644 index 0000000..5bb1832 --- /dev/null +++ b/yt_dlp/extractor/prx.py @@ -0,0 +1,428 @@ +import itertools +from .common import InfoExtractor, SearchInfoExtractor +from ..utils import ( + urljoin, + traverse_obj, + int_or_none, + mimetype2ext, + clean_html, + url_or_none, + unified_timestamp, + str_or_none, +) + + +class PRXBaseIE(InfoExtractor): + PRX_BASE_URL_RE = r'https?://(?:(?:beta|listen)\.)?prx.org/%s' + + def _call_api(self, item_id, path, query=None, fatal=True, note='Downloading CMS API JSON'): + return self._download_json( + urljoin('https://cms.prx.org/api/v1/', path), item_id, query=query, fatal=fatal, note=note) + + @staticmethod + def _get_prx_embed_response(response, section): + return traverse_obj(response, ('_embedded', f'prx:{section}')) + + @staticmethod + def _extract_file_link(response): + return url_or_none(traverse_obj( + response, ('_links', 'enclosure', 'href'), expected_type=str)) + + @classmethod + def _extract_image(cls, image_response): + if not isinstance(image_response, dict): + return + return { + 'id': str_or_none(image_response.get('id')), + 'filesize': image_response.get('size'), + 'width': image_response.get('width'), + 'height': image_response.get('height'), + 'url': cls._extract_file_link(image_response) + } + + @classmethod + def _extract_base_info(cls, response): + if not isinstance(response, dict): + return + item_id = str_or_none(response.get('id')) + if not item_id: + return + thumbnail_dict = cls._extract_image(cls._get_prx_embed_response(response, 'image')) + description = ( + clean_html(response.get('description')) + or response.get('shortDescription')) + return { + 'id': item_id, + 'title': response.get('title') or item_id, + 'thumbnails': [thumbnail_dict] if thumbnail_dict else None, + 'description': description, + 'release_timestamp': unified_timestamp(response.get('releasedAt')), + 'timestamp': unified_timestamp(response.get('createdAt')), + 'modified_timestamp': unified_timestamp(response.get('updatedAt')), + 'duration': int_or_none(response.get('duration')), + 'tags': response.get('tags'), + 'episode_number': int_or_none(response.get('episodeIdentifier')), + 'season_number': int_or_none(response.get('seasonIdentifier')) + } + + @classmethod + def _extract_series_info(cls, series_response): + base_info = cls._extract_base_info(series_response) + if not base_info: + return + account_info = cls._extract_account_info( + cls._get_prx_embed_response(series_response, 'account')) or {} + return { + **base_info, + 'channel_id': account_info.get('channel_id'), + 'channel_url': account_info.get('channel_url'), + 'channel': account_info.get('channel'), + 'series': base_info.get('title'), + 'series_id': base_info.get('id'), + } + + @classmethod + def _extract_account_info(cls, account_response): + base_info = cls._extract_base_info(account_response) + if not base_info: + return + name = account_response.get('name') + return { + **base_info, + 'title': name, + 'channel_id': base_info.get('id'), + 'channel_url': 'https://beta.prx.org/accounts/%s' % base_info.get('id'), + 'channel': name, + } + + @classmethod + def _extract_story_info(cls, story_response): + base_info = cls._extract_base_info(story_response) + if not base_info: + return + series = cls._extract_series_info( + cls._get_prx_embed_response(story_response, 'series')) or {} + account = cls._extract_account_info( + cls._get_prx_embed_response(story_response, 'account')) or {} + return { + **base_info, + 'series': series.get('series'), + 'series_id': series.get('series_id'), + 'channel_id': account.get('channel_id'), + 'channel_url': account.get('channel_url'), + 'channel': account.get('channel') + } + + def _entries(self, item_id, endpoint, entry_func, query=None): + """ + Extract entries from paginated list API + @param entry_func: Function to generate entry from response item + """ + total = 0 + for page in itertools.count(1): + response = self._call_api(f'{item_id}: page {page}', endpoint, query={ + **(query or {}), + 'page': page, + 'per': 100 + }) + items = self._get_prx_embed_response(response, 'items') + if not response or not items: + break + + yield from filter(None, map(entry_func, items)) + + total += response['count'] + if total >= response['total']: + break + + def _story_playlist_entry(self, response): + story = self._extract_story_info(response) + if not story: + return + story.update({ + '_type': 'url', + 'url': 'https://beta.prx.org/stories/%s' % story['id'], + 'ie_key': PRXStoryIE.ie_key() + }) + return story + + def _series_playlist_entry(self, response): + series = self._extract_series_info(response) + if not series: + return + series.update({ + '_type': 'url', + 'url': 'https://beta.prx.org/series/%s' % series['id'], + 'ie_key': PRXSeriesIE.ie_key() + }) + return series + + +class PRXStoryIE(PRXBaseIE): + _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'stories/(?P<id>\d+)' + + _TESTS = [ + { + # Story with season and episode details + 'url': 'https://beta.prx.org/stories/399200', + 'info_dict': { + 'id': '399200', + 'title': 'Fly Me To The Moon', + 'description': 'md5:43230168390b95d3322048d8a56bf2bb', + 'release_timestamp': 1640250000, + 'timestamp': 1640208972, + 'modified_timestamp': 1641318202, + 'duration': 1004, + 'tags': 'count:7', + 'episode_number': 8, + 'season_number': 5, + 'series': 'AirSpace', + 'series_id': '38057', + 'channel_id': '220986', + 'channel_url': 'https://beta.prx.org/accounts/220986', + 'channel': 'Air and Space Museum', + }, + 'playlist': [{ + 'info_dict': { + 'id': '399200_part1', + 'title': 'Fly Me To The Moon', + 'description': 'md5:43230168390b95d3322048d8a56bf2bb', + 'release_timestamp': 1640250000, + 'timestamp': 1640208972, + 'modified_timestamp': 1641318202, + 'duration': 530, + 'tags': 'count:7', + 'episode_number': 8, + 'season_number': 5, + 'series': 'AirSpace', + 'series_id': '38057', + 'channel_id': '220986', + 'channel_url': 'https://beta.prx.org/accounts/220986', + 'channel': 'Air and Space Museum', + 'ext': 'mp3', + 'upload_date': '20211222', + 'episode': 'Episode 8', + 'release_date': '20211223', + 'season': 'Season 5', + 'modified_date': '20220104' + } + }, { + 'info_dict': { + 'id': '399200_part2', + 'title': 'Fly Me To The Moon', + 'description': 'md5:43230168390b95d3322048d8a56bf2bb', + 'release_timestamp': 1640250000, + 'timestamp': 1640208972, + 'modified_timestamp': 1641318202, + 'duration': 474, + 'tags': 'count:7', + 'episode_number': 8, + 'season_number': 5, + 'series': 'AirSpace', + 'series_id': '38057', + 'channel_id': '220986', + 'channel_url': 'https://beta.prx.org/accounts/220986', + 'channel': 'Air and Space Museum', + 'ext': 'mp3', + 'upload_date': '20211222', + 'episode': 'Episode 8', + 'release_date': '20211223', + 'season': 'Season 5', + 'modified_date': '20220104' + } + } + + ] + }, { + # Story with only split audio + 'url': 'https://beta.prx.org/stories/326414', + 'info_dict': { + 'id': '326414', + 'title': 'Massachusetts v EPA', + 'description': 'md5:744fffba08f19f4deab69fa8d49d5816', + 'timestamp': 1592509124, + 'modified_timestamp': 1592510457, + 'duration': 3088, + 'tags': 'count:0', + 'series': 'Outside/In', + 'series_id': '36252', + 'channel_id': '206', + 'channel_url': 'https://beta.prx.org/accounts/206', + 'channel': 'New Hampshire Public Radio', + }, + 'playlist_count': 4 + }, { + # Story with single combined audio + 'url': 'https://beta.prx.org/stories/400404', + 'info_dict': { + 'id': '400404', + 'title': 'Cafe Chill (Episode 2022-01)', + 'thumbnails': 'count:1', + 'description': 'md5:9f1b5a3cbd64fb159d08c3baa31f1539', + 'timestamp': 1641233952, + 'modified_timestamp': 1641234248, + 'duration': 3540, + 'series': 'Café Chill', + 'series_id': '37762', + 'channel_id': '5767', + 'channel_url': 'https://beta.prx.org/accounts/5767', + 'channel': 'C89.5 - KNHC Seattle', + 'ext': 'mp3', + 'tags': 'count:0', + 'thumbnail': r're:https?://cms\.prx\.org/pub/\w+/0/web/story_image/767965/medium/Aurora_Over_Trees\.jpg', + 'upload_date': '20220103', + 'modified_date': '20220103' + } + }, { + 'url': 'https://listen.prx.org/stories/399200', + 'only_matching': True + } + ] + + def _extract_audio_pieces(self, audio_response): + return [{ + 'format_id': str_or_none(piece_response.get('id')), + 'format_note': str_or_none(piece_response.get('label')), + 'filesize': int_or_none(piece_response.get('size')), + 'duration': int_or_none(piece_response.get('duration')), + 'ext': mimetype2ext(piece_response.get('contentType')), + 'asr': int_or_none(piece_response.get('frequency'), scale=1000), + 'abr': int_or_none(piece_response.get('bitRate')), + 'url': self._extract_file_link(piece_response), + 'vcodec': 'none' + } for piece_response in sorted( + self._get_prx_embed_response(audio_response, 'items') or [], + key=lambda p: int_or_none(p.get('position')))] + + def _extract_story(self, story_response): + info = self._extract_story_info(story_response) + if not info: + return + audio_pieces = self._extract_audio_pieces( + self._get_prx_embed_response(story_response, 'audio')) + if len(audio_pieces) == 1: + return { + 'formats': audio_pieces, + **info + } + + entries = [{ + **info, + 'id': '%s_part%d' % (info['id'], (idx + 1)), + 'formats': [fmt], + } for idx, fmt in enumerate(audio_pieces)] + return { + '_type': 'multi_video', + 'entries': entries, + **info + } + + def _real_extract(self, url): + story_id = self._match_id(url) + response = self._call_api(story_id, f'stories/{story_id}') + return self._extract_story(response) + + +class PRXSeriesIE(PRXBaseIE): + _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'series/(?P<id>\d+)' + _TESTS = [ + { + 'url': 'https://beta.prx.org/series/36252', + 'info_dict': { + 'id': '36252', + 'title': 'Outside/In', + 'thumbnails': 'count:1', + 'description': 'md5:a6bedc5f810777bcb09ab30ff9059114', + 'timestamp': 1470684964, + 'modified_timestamp': 1582308830, + 'channel_id': '206', + 'channel_url': 'https://beta.prx.org/accounts/206', + 'channel': 'New Hampshire Public Radio', + 'series': 'Outside/In', + 'series_id': '36252' + }, + 'playlist_mincount': 39 + }, { + # Blank series + 'url': 'https://beta.prx.org/series/25038', + 'info_dict': { + 'id': '25038', + 'title': '25038', + 'timestamp': 1207612800, + 'modified_timestamp': 1207612800, + 'channel_id': '206', + 'channel_url': 'https://beta.prx.org/accounts/206', + 'channel': 'New Hampshire Public Radio', + 'series': '25038', + 'series_id': '25038' + }, + 'playlist_count': 0 + } + ] + + def _extract_series(self, series_response): + info = self._extract_series_info(series_response) + return { + '_type': 'playlist', + 'entries': self._entries(info['id'], 'series/%s/stories' % info['id'], self._story_playlist_entry), + **info + } + + def _real_extract(self, url): + series_id = self._match_id(url) + response = self._call_api(series_id, f'series/{series_id}') + return self._extract_series(response) + + +class PRXAccountIE(PRXBaseIE): + _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'accounts/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://beta.prx.org/accounts/206', + 'info_dict': { + 'id': '206', + 'title': 'New Hampshire Public Radio', + 'description': 'md5:277f2395301d0aca563c80c70a18ee0a', + 'channel_id': '206', + 'channel_url': 'https://beta.prx.org/accounts/206', + 'channel': 'New Hampshire Public Radio', + 'thumbnails': 'count:1' + }, + 'playlist_mincount': 380 + }] + + def _extract_account(self, account_response): + info = self._extract_account_info(account_response) + series = self._entries( + info['id'], f'accounts/{info["id"]}/series', self._series_playlist_entry) + stories = self._entries( + info['id'], f'accounts/{info["id"]}/stories', self._story_playlist_entry) + return { + '_type': 'playlist', + 'entries': itertools.chain(series, stories), + **info + } + + def _real_extract(self, url): + account_id = self._match_id(url) + response = self._call_api(account_id, f'accounts/{account_id}') + return self._extract_account(response) + + +class PRXStoriesSearchIE(PRXBaseIE, SearchInfoExtractor): + IE_DESC = 'PRX Stories Search' + IE_NAME = 'prxstories:search' + _SEARCH_KEY = 'prxstories' + + def _search_results(self, query): + yield from self._entries( + f'query {query}', 'stories/search', self._story_playlist_entry, query={'q': query}) + + +class PRXSeriesSearchIE(PRXBaseIE, SearchInfoExtractor): + IE_DESC = 'PRX Series Search' + IE_NAME = 'prxseries:search' + _SEARCH_KEY = 'prxseries' + + def _search_results(self, query): + yield from self._entries( + f'query {query}', 'series/search', self._series_playlist_entry, query={'q': query}) diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py new file mode 100644 index 0000000..4b8e5e9 --- /dev/null +++ b/yt_dlp/extractor/puhutv.py @@ -0,0 +1,233 @@ +from .common import InfoExtractor +from ..compat import compat_str +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + float_or_none, + parse_resolution, + str_or_none, + try_get, + unified_timestamp, + url_or_none, + urljoin, +) + + +class PuhuTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle' + IE_NAME = 'puhutv' + _TESTS = [{ + # film + 'url': 'https://puhutv.com/sut-kardesler-izle', + 'md5': 'a347470371d56e1585d1b2c8dab01c96', + 'info_dict': { + 'id': '5085', + 'display_id': 'sut-kardesler', + 'ext': 'mp4', + 'title': 'Süt Kardeşler', + 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 4832.44, + 'creator': 'Arzu Film', + 'timestamp': 1561062602, + 'upload_date': '20190620', + 'release_year': 1976, + 'view_count': int, + 'tags': list, + }, + }, { + # episode, geo restricted, bypassable with --geo-verification-proxy + 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', + 'only_matching': True, + }, { + # 4k, with subtitles + 'url': 'https://puhutv.com/dip-1-bolum-izle', + 'only_matching': True, + }] + _SUBTITLE_LANGS = { + 'English': 'en', + 'Deutsch': 'de', + 'عربى': 'ar' + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + info = self._download_json( + urljoin(url, '/api/slug/%s-izle' % display_id), + display_id)['data'] + + video_id = compat_str(info['id']) + show = info.get('title') or {} + title = info.get('name') or show['name'] + if info.get('display_name'): + title = '%s %s' % (title, info['display_name']) + + try: + videos = self._download_json( + 'https://puhutv.com/api/assets/%s/videos' % video_id, + display_id, 'Downloading video JSON', + headers=self.geo_verification_headers()) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + self.raise_geo_restricted() + raise + + urls = [] + formats = [] + + for video in videos['data']['videos']: + media_url = url_or_none(video.get('url')) + if not media_url or media_url in urls: + continue + urls.append(media_url) + + playlist = video.get('is_playlist') + if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue + + quality = int_or_none(video.get('quality')) + f = { + 'url': media_url, + 'ext': 'mp4', + 'height': quality + } + video_format = video.get('video_format') + is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False + if is_hls: + format_id = 'hls' + f['protocol'] = 'm3u8_native' + elif video_format == 'mp4': + format_id = 'http' + else: + continue + if quality: + format_id += '-%sp' % quality + f['format_id'] = format_id + formats.append(f) + + creator = try_get( + show, lambda x: x['producer']['name'], compat_str) + + content = info.get('content') or {} + + images = try_get( + content, lambda x: x['images']['wide'], dict) or {} + thumbnails = [] + for image_id, image_url in images.items(): + if not isinstance(image_url, compat_str): + continue + if not image_url.startswith(('http', '//')): + image_url = 'https://%s' % image_url + t = parse_resolution(image_id) + t.update({ + 'id': image_id, + 'url': image_url + }) + thumbnails.append(t) + + tags = [] + for genre in show.get('genres') or []: + if not isinstance(genre, dict): + continue + genre_name = genre.get('name') + if genre_name and isinstance(genre_name, compat_str): + tags.append(genre_name) + + subtitles = {} + for subtitle in content.get('subtitles') or []: + if not isinstance(subtitle, dict): + continue + lang = subtitle.get('language') + sub_url = url_or_none(subtitle.get('url') or subtitle.get('file')) + if not lang or not isinstance(lang, compat_str) or not sub_url: + continue + subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ + 'url': sub_url + }] + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': info.get('description') or show.get('description'), + 'season_id': str_or_none(info.get('season_id')), + 'season_number': int_or_none(info.get('season_number')), + 'episode_number': int_or_none(info.get('episode_number')), + 'release_year': int_or_none(show.get('released_at')), + 'timestamp': unified_timestamp(info.get('created_at')), + 'creator': creator, + 'view_count': int_or_none(content.get('watch_count')), + 'duration': float_or_none(content.get('duration_in_ms'), 1000), + 'tags': tags, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + 'formats': formats + } + + +class PuhuTVSerieIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay' + IE_NAME = 'puhutv:serie' + _TESTS = [{ + 'url': 'https://puhutv.com/deniz-yildizi-detay', + 'info_dict': { + 'title': 'Deniz Yıldızı', + 'id': 'deniz-yildizi', + }, + 'playlist_mincount': 205, + }, { + # a film detail page which is using same url with serie page + 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', + 'only_matching': True, + }] + + def _extract_entries(self, seasons): + for season in seasons: + season_id = season.get('id') + if not season_id: + continue + page = 1 + has_more = True + while has_more is True: + season = self._download_json( + 'https://galadriel.puhutv.com/seasons/%s' % season_id, + season_id, 'Downloading page %s' % page, query={ + 'page': page, + 'per': 40, + }) + episodes = season.get('episodes') + if isinstance(episodes, list): + for ep in episodes: + slug_path = str_or_none(ep.get('slugPath')) + if not slug_path: + continue + video_id = str_or_none(int_or_none(ep.get('id'))) + yield self.url_result( + 'https://puhutv.com/%s' % slug_path, + ie=PuhuTVIE.ie_key(), video_id=video_id, + video_title=ep.get('name') or ep.get('eventLabel')) + page += 1 + has_more = season.get('hasMore') + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + info = self._download_json( + urljoin(url, '/api/slug/%s-detay' % playlist_id), + playlist_id)['data'] + + seasons = info.get('seasons') + if seasons: + return self.playlist_result( + self._extract_entries(seasons), playlist_id, info.get('name')) + + # For films, these are using same url with series + video_id = info.get('slug') or info['assets'][0]['slug'] + return self.url_result( + 'https://puhutv.com/%s-izle' % video_id, + PuhuTVIE.ie_key(), video_id) diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py new file mode 100644 index 0000000..38c5d11 --- /dev/null +++ b/yt_dlp/extractor/puls4.py @@ -0,0 +1,51 @@ +from .prosiebensat1 import ProSiebenSat1BaseIE +from ..compat import compat_str +from ..utils import parse_duration, unified_strdate + + +class Puls4IE(ProSiebenSat1BaseIE): + _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' + _TESTS = [{ + 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', + 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', + 'info_dict': { + 'id': '118118', + 'ext': 'flv', + 'title': 'Tobias Homberger von myclubs im #2min2miotalk', + 'description': 'md5:f9def7c5e8745d6026d8885487d91955', + 'upload_date': '20160830', + 'uploader': 'PULS_4', + }, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', + 'only_matching': True, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', + 'only_matching': True, + }] + _TOKEN = 'puls4' + _SALT = '01!kaNgaiNgah1Ie4AeSha' + _CLIENT_NAME = '' + + def _real_extract(self, url): + path = self._match_id(url) + content_path = self._download_json( + 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] + media = self._download_json( + 'http://www.puls4.com' + content_path, + content_path)['mediaCurrent'] + player_content = media['playerContent'] + info = self._extract_video_info(url, player_content['id']) + info.update({ + 'id': compat_str(media['objectId']), + 'title': player_content['title'], + 'description': media.get('description'), + 'thumbnail': media.get('previewLink'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(player_content.get('duration')), + 'episode': player_content.get('episodePartName'), + 'show': media.get('channel'), + 'season_id': player_content.get('seasonId'), + 'uploader': player_content.get('sourceCompany'), + }) + return info diff --git a/yt_dlp/extractor/pyvideo.py b/yt_dlp/extractor/pyvideo.py new file mode 100644 index 0000000..7b25166 --- /dev/null +++ b/yt_dlp/extractor/pyvideo.py @@ -0,0 +1,70 @@ +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class PyvideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' + + _TESTS = [{ + 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', + 'info_dict': { + 'id': 'become-a-logging-expert-in-30-minutes', + }, + 'playlist_count': 2, + }, { + 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', + 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', + 'info_dict': { + 'id': '2542', + 'ext': 'm4v', + 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', + }, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + category = mobj.group('category') + video_id = mobj.group('id') + + entries = [] + + data = self._download_json( + 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' + % (category, video_id), video_id, fatal=False) + + if data: + for video in data['videos']: + video_url = video.get('url') + if video_url: + if video.get('type') == 'youtube': + entries.append(self.url_result(video_url, 'Youtube')) + else: + entries.append({ + 'id': compat_str(data.get('id') or video_id), + 'url': video_url, + 'title': data['title'], + 'description': data.get('description') or data.get('summary'), + 'thumbnail': data.get('thumbnail_url'), + 'duration': int_or_none(data.get('duration')), + }) + else: + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + media_urls = self._search_regex( + r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') + for m in re.finditer( + r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): + media_url = m.group('url') + if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): + entries.append(self.url_result(media_url, 'Youtube')) + else: + entries.append({ + 'id': video_id, + 'url': media_url, + 'title': title, + }) + + return self.playlist_result(entries, video_id) diff --git a/yt_dlp/extractor/qdance.py b/yt_dlp/extractor/qdance.py new file mode 100644 index 0000000..934ebbf --- /dev/null +++ b/yt_dlp/extractor/qdance.py @@ -0,0 +1,171 @@ +import json +import time + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + jwt_decode_hs256, + str_or_none, + traverse_obj, + try_call, + url_or_none, +) + + +class QDanceIE(InfoExtractor): + _NETRC_MACHINE = 'qdance' + _VALID_URL = r'https?://(?:www\.)?q-dance\.com/network/(?:library|live)/(?P<id>[\w-]+)' + _TESTS = [{ + 'note': 'vod', + 'url': 'https://www.q-dance.com/network/library/146542138', + 'info_dict': { + 'id': '146542138', + 'ext': 'mp4', + 'title': 'Sound Rush [LIVE] | Defqon.1 Weekend Festival 2022 | Friday | RED', + 'display_id': 'sound-rush-live-v3-defqon-1-weekend-festival-2022-friday-red', + 'description': 'Relive Defqon.1 - Primal Energy 2022 with the sounds of Sound Rush LIVE at the RED on Friday! 🔥', + 'season': 'Defqon.1 Weekend Festival 2022', + 'season_id': '31840632', + 'series': 'Defqon.1', + 'series_id': '31840378', + 'thumbnail': 'https://images.q-dance.network/1674829540-20220624171509-220624171509_delio_dn201093-2.jpg', + 'availability': 'premium_only', + 'duration': 1829, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'livestream', + 'url': 'https://www.q-dance.com/network/live/149170353', + 'info_dict': { + 'id': '149170353', + 'ext': 'mp4', + 'title': r're:^Defqon\.1 2023 - Friday - RED', + 'display_id': 'defqon-1-2023-friday-red', + 'description': 'md5:3c73fbbd4044e578e696adfc64019163', + 'season': 'Defqon.1 Weekend Festival 2023', + 'season_id': '141735599', + 'series': 'Defqon.1', + 'series_id': '31840378', + 'thumbnail': 'https://images.q-dance.network/1686849069-area-thumbs_red.png', + 'availability': 'subscriber_only', + 'live_status': 'is_live', + 'channel_id': 'qdancenetwork.video_149170353', + }, + 'skip': 'Completed livestream', + }, { + 'note': 'vod with alphanumeric id', + 'url': 'https://www.q-dance.com/network/library/WhDleSIWSfeT3Q9ObBKBeA', + 'info_dict': { + 'id': 'WhDleSIWSfeT3Q9ObBKBeA', + 'ext': 'mp4', + 'title': 'Aftershock I Defqon.1 Weekend Festival 2023 I Sunday I BLUE', + 'display_id': 'naam-i-defqon-1-weekend-festival-2023-i-dag-i-podium', + 'description': 'Relive Defqon.1 Path of the Warrior with Aftershock at the BLUE 🔥', + 'series': 'Defqon.1', + 'series_id': '31840378', + 'season': 'Defqon.1 Weekend Festival 2023', + 'season_id': '141735599', + 'duration': 3507, + 'availability': 'premium_only', + 'thumbnail': 'https://images.q-dance.network/1698158361-230625-135716-defqon-1-aftershock.jpg', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.q-dance.com/network/library/-uRFKXwmRZGVnve7av9uqA', + 'only_matching': True, + }] + + _access_token = None + _refresh_token = None + + def _call_login_api(self, data, note='Logging in'): + login = self._download_json( + 'https://members.id-t.com/api/auth/login', None, note, headers={ + 'content-type': 'application/json', + 'brand': 'qdance', + 'origin': 'https://www.q-dance.com', + 'referer': 'https://www.q-dance.com/', + }, data=json.dumps(data, separators=(',', ':')).encode(), + expected_status=lambda x: True) + + tokens = traverse_obj(login, ('data', { + '_id-t-accounts-token': ('accessToken', {str}), + '_id-t-accounts-refresh': ('refreshToken', {str}), + '_id-t-accounts-id-token': ('idToken', {str}), + })) + + if not tokens.get('_id-t-accounts-token'): + error = ': '.join(traverse_obj(login, ('error', ('code', 'message'), {str}))) + if 'validation_error' not in error: + raise ExtractorError(f'Q-Dance API said "{error}"') + msg = 'Invalid username or password' if 'email' in data else 'Refresh token has expired' + raise ExtractorError(msg, expected=True) + + for name, value in tokens.items(): + self._set_cookie('.q-dance.com', name, value) + + def _perform_login(self, username, password): + self._call_login_api({'email': username, 'password': password}) + + def _real_initialize(self): + cookies = self._get_cookies('https://www.q-dance.com/') + self._refresh_token = try_call(lambda: cookies['_id-t-accounts-refresh'].value) + self._access_token = try_call(lambda: cookies['_id-t-accounts-token'].value) + if not self._access_token: + self.raise_login_required() + + def _get_auth(self): + if (try_call(lambda: jwt_decode_hs256(self._access_token)['exp']) or 0) <= int(time.time() - 120): + if not self._refresh_token: + raise ExtractorError( + 'Cannot refresh access token, login with yt-dlp or refresh cookies in browser') + self._call_login_api({'refreshToken': self._refresh_token}, note='Refreshing access token') + self._real_initialize() + + return {'Authorization': self._access_token} + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + data = self._search_nuxt_data(webpage, video_id, traverse=('data', 0, 'data')) + + def extract_availability(level): + level = int_or_none(level) or 0 + return self._availability( + needs_premium=(level >= 20), needs_subscription=(level >= 15), needs_auth=True) + + info = traverse_obj(data, { + 'title': ('title', {str.strip}), + 'description': ('description', {str.strip}), + 'display_id': ('slug', {str}), + 'thumbnail': ('thumbnail', {url_or_none}), + 'duration': ('durationInSeconds', {int_or_none}, {lambda x: x or None}), + 'availability': ('subscription', 'level', {extract_availability}), + 'is_live': ('type', {lambda x: x.lower() == 'live'}), + 'artist': ('acts', ..., {str}), + 'series': ('event', 'title', {str.strip}), + 'series_id': ('event', 'id', {str_or_none}), + 'season': ('eventEdition', 'title', {str.strip}), + 'season_id': ('eventEdition', 'id', {str_or_none}), + 'channel_id': ('pubnub', 'channelName', {str}), + }) + + stream = self._download_json( + f'https://dc9h6qmsoymbq.cloudfront.net/api/content/videos/{video_id}/url', + video_id, headers=self._get_auth(), expected_status=401) + + m3u8_url = traverse_obj(stream, ('data', 'url', {url_or_none})) + if not m3u8_url and traverse_obj(stream, ('error', 'code')) == 'unauthorized': + raise ExtractorError('Your account does not have access to this content', expected=True) + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, fatal=False, live=True) if m3u8_url else [] + if not formats: + self.raise_no_formats('No active streams found', expected=bool(info.get('is_live'))) + + return { + **info, + 'id': video_id, + 'formats': formats, + } diff --git a/yt_dlp/extractor/qingting.py b/yt_dlp/extractor/qingting.py new file mode 100644 index 0000000..aa690d4 --- /dev/null +++ b/yt_dlp/extractor/qingting.py @@ -0,0 +1,47 @@ +from .common import InfoExtractor + +from ..utils import traverse_obj + + +class QingTingIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|m\.)?(?:qingting\.fm|qtfm\.cn)/v?channels/(?P<channel>\d+)/programs/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.qingting.fm/channels/378005/programs/22257411/', + 'md5': '47e6a94f4e621ed832c316fd1888fb3c', + 'info_dict': { + 'id': '22257411', + 'title': '用了十年才修改,谁在乎教科书?', + 'channel_id': '378005', + 'channel': '睡前消息', + 'uploader': '马督工', + 'ext': 'm4a', + } + }, { + 'url': 'https://m.qtfm.cn/vchannels/378005/programs/23023573/', + 'md5': '2703120b6abe63b5fa90b975a58f4c0e', + 'info_dict': { + 'id': '23023573', + 'title': '【睡前消息488】重庆山火之后,有图≠真相', + 'channel_id': '378005', + 'channel': '睡前消息', + 'uploader': '马督工', + 'ext': 'm4a', + } + }] + + def _real_extract(self, url): + channel_id, pid = self._match_valid_url(url).group('channel', 'id') + webpage = self._download_webpage( + f'https://m.qtfm.cn/vchannels/{channel_id}/programs/{pid}/', pid) + info = self._search_json(r'window\.__initStores\s*=', webpage, 'program info', pid) + return { + 'id': pid, + 'title': traverse_obj(info, ('ProgramStore', 'programInfo', 'title')), + 'channel_id': channel_id, + 'channel': traverse_obj(info, ('ProgramStore', 'channelInfo', 'title')), + 'uploader': traverse_obj(info, ('ProgramStore', 'podcasterInfo', 'podcaster', 'nickname')), + 'url': traverse_obj(info, ('ProgramStore', 'programInfo', 'audioUrl')), + 'vcodec': 'none', + 'acodec': 'm4a', + 'ext': 'm4a', + } diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py new file mode 100644 index 0000000..9285825 --- /dev/null +++ b/yt_dlp/extractor/qqmusic.py @@ -0,0 +1,365 @@ +import random +import re +import time + +from .common import InfoExtractor +from ..utils import ( + clean_html, + ExtractorError, + strip_jsonp, + unescapeHTML, +) + + +class QQMusicIE(InfoExtractor): + IE_NAME = 'qqmusic' + IE_DESC = 'QQ音乐' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html' + _TESTS = [{ + 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', + 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', + 'info_dict': { + 'id': '004295Et37taLD', + 'ext': 'mp3', + 'title': '可惜没如果', + 'release_date': '20141227', + 'creator': '林俊杰', + 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'note': 'There is no mp3-320 version of this song.', + 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html', + 'md5': 'fa3926f0c585cda0af8fa4f796482e3e', + 'info_dict': { + 'id': '004MsGEo3DdNxV', + 'ext': 'mp3', + 'title': '如果', + 'release_date': '20050626', + 'creator': '李季美', + 'description': 'md5:46857d5ed62bc4ba84607a805dccf437', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'note': 'lyrics not in .lrc format', + 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html', + 'info_dict': { + 'id': '001JyApY11tIp6', + 'ext': 'mp3', + 'title': 'Shadows Over Transylvania', + 'release_date': '19970225', + 'creator': 'Dark Funeral', + 'description': 'md5:c9b20210587cbcd6836a1c597bab4525', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + }] + + _FORMATS = { + 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, + 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, + 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10} + } + + # Reference: m_r_GetRUin() in top_player.js + # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js + @staticmethod + def m_r_get_ruin(): + curMs = int(time.time() * 1000) % 1000 + return int(round(random.random() * 2147483647) * curMs % 1E10) + + def _real_extract(self, url): + mid = self._match_id(url) + + detail_info_page = self._download_webpage( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, + mid, note='Download song detail info', + errnote='Unable to get song detail info', encoding='gbk') + + song_name = self._html_search_regex( + r"songname:\s*'([^']+)'", detail_info_page, 'song name') + + publish_time = self._html_search_regex( + r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, + 'publish time', default=None) + if publish_time: + publish_time = publish_time.replace('-', '') + + singer = self._html_search_regex( + r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) + + lrc_content = self._html_search_regex( + r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', + detail_info_page, 'LRC lyrics', default=None) + if lrc_content: + lrc_content = lrc_content.replace('\\n', '\n') + + thumbnail_url = None + albummid = self._search_regex( + [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], + detail_info_page, 'album mid', default=None) + if albummid: + thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \ + % (albummid[-2:-1], albummid[-1], albummid) + + guid = self.m_r_get_ruin() + + vkey = self._download_json( + 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, + mid, note='Retrieve vkey', errnote='Unable to get vkey', + transform_source=strip_jsonp)['key'] + + formats = [] + for format_id, details in self._FORMATS.items(): + formats.append({ + 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' + % (details['prefix'], mid, details['ext'], vkey, guid), + 'format': format_id, + 'format_id': format_id, + 'quality': details['preference'], + 'abr': details.get('abr'), + }) + self._check_formats(formats, mid) + + actual_lrc_lyrics = ''.join( + line + '\n' for line in re.findall( + r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content)) + + info_dict = { + 'id': mid, + 'formats': formats, + 'title': song_name, + 'release_date': publish_time, + 'creator': singer, + 'description': lrc_content, + 'thumbnail': thumbnail_url + } + if actual_lrc_lyrics: + info_dict['subtitles'] = { + 'origin': [{ + 'ext': 'lrc', + 'data': actual_lrc_lyrics, + }] + } + return info_dict + + +class QQPlaylistBaseIE(InfoExtractor): + @staticmethod + def qq_static_url(category, mid): + return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) + + def get_singer_all_songs(self, singmid, num): + return self._download_webpage( + r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid, + query={ + 'format': 'json', + 'inCharset': 'utf8', + 'outCharset': 'utf-8', + 'platform': 'yqq', + 'needNewCode': 0, + 'singermid': singmid, + 'order': 'listen', + 'begin': 0, + 'num': num, + 'songstatus': 1, + }) + + def get_entries_from_page(self, singmid): + entries = [] + + default_num = 1 + json_text = self.get_singer_all_songs(singmid, default_num) + json_obj_all_songs = self._parse_json(json_text, singmid) + + if json_obj_all_songs['code'] == 0: + total = json_obj_all_songs['data']['total'] + json_text = self.get_singer_all_songs(singmid, total) + json_obj_all_songs = self._parse_json(json_text, singmid) + + for item in json_obj_all_songs['data']['list']: + if item['musicData'].get('songmid') is not None: + songmid = item['musicData']['songmid'] + entries.append(self.url_result( + r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) + + return entries + + +class QQMusicSingerIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:singer' + IE_DESC = 'QQ音乐 - 歌手' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html' + _TEST = { + 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html', + 'info_dict': { + 'id': '001BLpXF2DyJe2', + 'title': '林俊杰', + 'description': 'md5:870ec08f7d8547c29c93010899103751', + }, + 'playlist_mincount': 12, + } + + def _real_extract(self, url): + mid = self._match_id(url) + + entries = self.get_entries_from_page(mid) + singer_page = self._download_webpage(url, mid, 'Download singer page') + singer_name = self._html_search_regex( + r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None) + singer_desc = None + + if mid: + singer_desc_page = self._download_xml( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid, + 'Donwload singer description XML', + query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid}, + headers={'Referer': 'https://y.qq.com/n/yqq/singer/'}) + + singer_desc = singer_desc_page.find('./data/info/desc').text + + return self.playlist_result(entries, mid, singer_name, singer_desc) + + +class QQMusicAlbumIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:album' + IE_DESC = 'QQ音乐 - 专辑' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html' + + _TESTS = [{ + 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html', + 'info_dict': { + 'id': '000gXCTb2AhRR1', + 'title': '我们都是这样长大的', + 'description': 'md5:179c5dce203a5931970d306aa9607ea6', + }, + 'playlist_count': 4, + }, { + 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html', + 'info_dict': { + 'id': '002Y5a3b3AlCu3', + 'title': '그리고...', + 'description': 'md5:a48823755615508a95080e81b51ba729', + }, + 'playlist_count': 8, + }] + + def _real_extract(self, url): + mid = self._match_id(url) + + album = self._download_json( + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid, + mid, 'Download album page')['data'] + + entries = [ + self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] + ) for song in album['list'] + ] + album_name = album.get('name') + album_detail = album.get('desc') + if album_detail is not None: + album_detail = album_detail.strip() + + return self.playlist_result(entries, mid, album_name, album_detail) + + +class QQMusicToplistIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:toplist' + IE_DESC = 'QQ音乐 - 排行榜' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html' + + _TESTS = [{ + 'url': 'https://y.qq.com/n/yqq/toplist/123.html', + 'info_dict': { + 'id': '123', + 'title': '美国iTunes榜', + 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08', + }, + 'playlist_count': 100, + }, { + 'url': 'https://y.qq.com/n/yqq/toplist/3.html', + 'info_dict': { + 'id': '3', + 'title': '巅峰榜·欧美', + 'description': 'md5:5a600d42c01696b26b71f8c4d43407da', + }, + 'playlist_count': 100, + }, { + 'url': 'https://y.qq.com/n/yqq/toplist/106.html', + 'info_dict': { + 'id': '106', + 'title': '韩国Mnet榜', + 'description': 'md5:cb84b325215e1d21708c615cac82a6e7', + }, + 'playlist_count': 50, + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + + toplist_json = self._download_json( + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, + note='Download toplist page', + query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) + + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', + song['data']['songmid']) + for song in toplist_json['songlist']] + + topinfo = toplist_json.get('topinfo', {}) + list_name = topinfo.get('ListName') + list_description = topinfo.get('info') + return self.playlist_result(entries, list_id, list_name, list_description) + + +class QQMusicPlaylistIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:playlist' + IE_DESC = 'QQ音乐 - 歌单' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html' + + _TESTS = [{ + 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html', + 'info_dict': { + 'id': '3462654915', + 'title': '韩国5月新歌精选下旬', + 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', + }, + 'playlist_count': 40, + 'skip': 'playlist gone', + }, { + 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html', + 'info_dict': { + 'id': '1374105607', + 'title': '易入人心的华语民谣', + 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。', + }, + 'playlist_count': 20, + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + + list_json = self._download_json( + 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', + list_id, 'Download list page', + query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, + transform_source=strip_jsonp) + if not len(list_json.get('cdlist', [])): + if list_json.get('code'): + raise ExtractorError( + 'QQ Music said: error %d in fetching playlist info' % list_json['code'], + expected=True) + raise ExtractorError('Unable to get playlist info') + + cdlist = list_json['cdlist'][0] + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']) + for song in cdlist['songlist']] + + list_name = cdlist.get('dissname') + list_description = clean_html(unescapeHTML(cdlist.get('desc'))) + return self.playlist_result(entries, list_id, list_name, list_description) diff --git a/yt_dlp/extractor/r7.py b/yt_dlp/extractor/r7.py new file mode 100644 index 0000000..36f0b52 --- /dev/null +++ b/yt_dlp/extractor/r7.py @@ -0,0 +1,112 @@ +from .common import InfoExtractor +from ..utils import int_or_none + + +class R7IE(InfoExtractor): + _WORKING = False + _ENABLED = None # XXX: pass through to GenericIE + _VALID_URL = r'''(?x) + https?:// + (?: + (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| + noticias\.r7\.com(?:/[^/]+)+/[^/]+-| + player\.r7\.com/video/i/ + ) + (?P<id>[\da-f]{24}) + ''' + _TESTS = [{ + 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', + 'md5': '403c4e393617e8e8ddc748978ee8efde', + 'info_dict': { + 'id': '54e7050b0cf2ff57e0279389', + 'ext': 'mp4', + 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', + 'description': 'md5:01812008664be76a6479aa58ec865b72', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 98, + 'like_count': int, + 'view_count': int, + }, + }, { + 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', + 'only_matching': True, + }, { + 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', + 'only_matching': True, + }, { + 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://player-api.r7.com/video/i/%s' % video_id, video_id) + + title = video['title'] + + formats = [] + media_url_hls = video.get('media_url_hls') + if media_url_hls: + formats.extend(self._extract_m3u8_formats( + media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + media_url = video.get('media_url') + if media_url: + f = { + 'url': media_url, + 'format_id': 'http', + } + # m3u8 format always matches the http format, let's copy metadata from + # one to another + m3u8_formats = list(filter( + lambda f: f.get('vcodec') != 'none', formats)) + if len(m3u8_formats) == 1: + f_copy = m3u8_formats[0].copy() + f_copy.update(f) + f_copy['protocol'] = 'http' + f = f_copy + formats.append(f) + + description = video.get('description') + thumbnail = video.get('thumb') + duration = int_or_none(video.get('media_duration')) + like_count = int_or_none(video.get('likes')) + view_count = int_or_none(video.get('views')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'like_count': like_count, + 'view_count': view_count, + 'formats': formats, + } + + +class R7ArticleIE(InfoExtractor): + _WORKING = False + _ENABLED = None # XXX: pass through to GenericIE + _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' + _TEST = { + 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', + 'only_matching': True, + } + + @classmethod + def suitable(cls, url): + return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', + webpage, 'video id') + + return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py new file mode 100644 index 0000000..f013582 --- /dev/null +++ b/yt_dlp/extractor/radiko.py @@ -0,0 +1,261 @@ +import base64 +import random +import re +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + time_seconds, + try_call, + unified_timestamp, + update_url_query, +) +from ..utils.traversal import traverse_obj + + +class RadikoBaseIE(InfoExtractor): + _GEO_BYPASS = False + _FULL_KEY = None + _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED = ( + 'https://c-rpaa.smartstream.ne.jp', + 'https://si-c-radiko.smartstream.ne.jp', + 'https://tf-f-rpaa-radiko.smartstream.ne.jp', + 'https://tf-c-rpaa-radiko.smartstream.ne.jp', + 'https://si-f-radiko.smartstream.ne.jp', + 'https://rpaa.smartstream.ne.jp', + ) + _HOSTS_FOR_TIME_FREE_FFMPEG_SUPPORTED = ( + 'https://rd-wowza-radiko.radiko-cf.com', + 'https://radiko.jp', + 'https://f-radiko.smartstream.ne.jp', + ) + # Following URL forcibly connects not Time Free but Live + _HOSTS_FOR_LIVE = ( + 'https://c-radiko.smartstream.ne.jp', + ) + + def _negotiate_token(self): + _, auth1_handle = self._download_webpage_handle( + 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', + headers={ + 'x-radiko-app': 'pc_html5', + 'x-radiko-app-version': '0.0.1', + 'x-radiko-device': 'pc', + 'x-radiko-user': 'dummy_user', + }) + auth1_header = auth1_handle.headers + + auth_token = auth1_header['X-Radiko-AuthToken'] + kl = int(auth1_header['X-Radiko-KeyLength']) + ko = int(auth1_header['X-Radiko-KeyOffset']) + raw_partial_key = self._extract_full_key()[ko:ko + kl] + partial_key = base64.b64encode(raw_partial_key).decode() + + area_id = self._download_webpage( + 'https://radiko.jp/v2/api/auth2', None, 'Authenticating', + headers={ + 'x-radiko-device': 'pc', + 'x-radiko-user': 'dummy_user', + 'x-radiko-authtoken': auth_token, + 'x-radiko-partialkey': partial_key, + }).split(',')[0] + + if area_id == 'OUT': + self.raise_geo_restricted(countries=['JP']) + + auth_data = (auth_token, area_id) + self.cache.store('radiko', 'auth_data', auth_data) + return auth_data + + def _auth_client(self): + cachedata = self.cache.load('radiko', 'auth_data') + if cachedata is not None: + response = self._download_webpage( + 'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status=401, + headers={'X-Radiko-AuthToken': cachedata[0], 'X-Radiko-AreaId': cachedata[1]}) + if response == 'OK': + return cachedata + return self._negotiate_token() + + def _extract_full_key(self): + if self._FULL_KEY: + return self._FULL_KEY + + jscode = self._download_webpage( + 'https://radiko.jp/apps/js/playerCommon.js', None, + note='Downloading player js code') + full_key = self._search_regex( + (r"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\1,\s*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"), + jscode, 'full key', fatal=False, group='fullkey') + + if full_key: + full_key = full_key.encode() + else: # use only full key ever known + full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa' + + self._FULL_KEY = full_key + return full_key + + def _find_program(self, video_id, station, cursor): + station_program = self._download_xml( + 'https://radiko.jp/v3/program/station/weekly/%s.xml' % station, video_id, + note='Downloading radio program for %s station' % station) + + prog = None + for p in station_program.findall('.//prog'): + ft_str, to_str = p.attrib['ft'], p.attrib['to'] + ft = unified_timestamp(ft_str, False) + to = unified_timestamp(to_str, False) + if ft <= cursor and cursor < to: + prog = p + break + if not prog: + raise ExtractorError('Cannot identify radio program to download!') + assert ft, to + return prog, station_program, ft, ft_str, to_str + + def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query): + m3u8_playlist_data = self._download_xml( + f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id, + note='Downloading stream information') + + formats = [] + found = set() + + timefree_int = 0 if is_onair else 1 + + for element in m3u8_playlist_data.findall(f'.//url[@timefree="{timefree_int}"]/playlist_create_url'): + pcu = element.text + if pcu in found: + continue + found.add(pcu) + playlist_url = update_url_query(pcu, { + 'station_id': station, + **query, + 'l': '15', + 'lsid': ''.join(random.choices('0123456789abcdef', k=32)), + 'type': 'b', + }) + + time_to_skip = None if is_onair else cursor - ft + + domain = urllib.parse.urlparse(playlist_url).netloc + subformats = self._extract_m3u8_formats( + playlist_url, video_id, ext='m4a', + live=True, fatal=False, m3u8_id=domain, + note=f'Downloading m3u8 information from {domain}', + headers={ + 'X-Radiko-AreaId': area_id, + 'X-Radiko-AuthToken': auth_token, + }) + for sf in subformats: + if (is_onair ^ pcu.startswith(self._HOSTS_FOR_LIVE)) or ( + not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)): + sf['preference'] = -100 + sf['format_note'] = 'not preferred' + if not is_onair and timefree_int == 1 and time_to_skip: + sf['downloader_options'] = {'ffmpeg_args': ['-ss', str(time_to_skip)]} + formats.extend(subformats) + + return formats + + def _extract_performers(self, prog): + return traverse_obj(prog, ( + 'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip})) or None + + +class RadikoIE(RadikoBaseIE): + _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)' + + _TESTS = [{ + # QRR (文化放送) station provides <desc> + 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300', + 'only_matching': True, + }, { + # FMT (TOKYO FM) station does not provide <desc> + 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000', + 'only_matching': True, + }, { + 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000', + 'only_matching': True, + }] + + def _real_extract(self, url): + station, video_id = self._match_valid_url(url).groups() + vid_int = unified_timestamp(video_id, False) + prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) + + auth_token, area_id = self._auth_client() + + return { + 'id': video_id, + 'title': try_call(lambda: prog.find('title').text), + 'cast': self._extract_performers(prog), + 'description': clean_html(try_call(lambda: prog.find('info').text)), + 'uploader': try_call(lambda: station_program.find('.//name').text), + 'uploader_id': station, + 'timestamp': vid_int, + 'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)), + 'is_live': True, + 'formats': self._extract_formats( + video_id=video_id, station=station, is_onair=False, + ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, + query={ + 'start_at': radio_begin, + 'ft': radio_begin, + 'end_at': radio_end, + 'to': radio_end, + 'seek': video_id + } + ), + } + + +class RadikoRadioIE(RadikoBaseIE): + _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)' + + _TESTS = [{ + # QRR (文化放送) station provides <desc> + 'url': 'https://radiko.jp/#!/live/QRR', + 'only_matching': True, + }, { + # FMT (TOKYO FM) station does not provide <desc> + 'url': 'https://radiko.jp/#!/live/FMT', + 'only_matching': True, + }, { + 'url': 'https://radiko.jp/#!/live/JOAK-FM', + 'only_matching': True, + }] + + def _real_extract(self, url): + station = self._match_id(url) + self.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop') + + auth_token, area_id = self._auth_client() + # get current time in JST (GMT+9:00 w/o DST) + vid_now = time_seconds(hours=9) + + prog, station_program, ft, _, _ = self._find_program(station, station, vid_now) + + title = prog.find('title').text + description = clean_html(prog.find('info').text) + station_name = station_program.find('.//name').text + + formats = self._extract_formats( + video_id=station, station=station, is_onair=True, + ft=ft, cursor=vid_now, auth_token=auth_token, area_id=area_id, + query={}) + + return { + 'id': station, + 'title': title, + 'cast': self._extract_performers(prog), + 'description': description, + 'uploader': station_name, + 'uploader_id': station, + 'timestamp': ft, + 'formats': formats, + 'is_live': True, + } diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py new file mode 100644 index 0000000..1a5a635 --- /dev/null +++ b/yt_dlp/extractor/radiocanada.py @@ -0,0 +1,165 @@ +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + unified_strdate, +) + + +class RadioCanadaIE(InfoExtractor): + IE_NAME = 'radiocanada' + _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' + _TESTS = [ + { + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', + 'info_dict': { + 'id': '7184272', + 'ext': 'mp4', + 'title': 'Le parcours du tireur capté sur vidéo', + 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', + 'upload_date': '20141023', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, + { + # empty Title + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', + 'info_dict': { + 'id': '7754998', + 'ext': 'mp4', + 'title': 'letelejournal22h', + 'description': 'INTEGRALE WEB 22H-TJ', + 'upload_date': '20170720', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + # with protectionType but not actually DRM protected + 'url': 'radiocanada:toutv:140872', + 'info_dict': { + 'id': '140872', + 'title': 'Épisode 1', + 'series': 'District 31', + }, + 'only_matching': True, + } + ] + _GEO_COUNTRIES = ['CA'] + _access_token = None + _claims = None + + def _call_api(self, path, video_id=None, app_code=None, query=None): + if not query: + query = {} + query.update({ + 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', + 'output': 'json', + }) + if video_id: + query.update({ + 'appCode': app_code, + 'idMedia': video_id, + }) + if self._access_token: + query['access_token'] = self._access_token + try: + return self._download_json( + 'https://services.radio-canada.ca/media/' + path, video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status in (401, 422): + data = self._parse_json(e.cause.response.read().decode(), None) + error = data.get('error_description') or data['errorMessage']['text'] + raise ExtractorError(error, expected=True) + raise + + def _extract_info(self, app_code, video_id): + metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] + + def get_meta(name): + for meta in metas: + if meta.get('name') == name: + text = meta.get('text') + if text: + return text + + # protectionType does not necessarily mean the video is DRM protected (see + # https://github.com/ytdl-org/youtube-dl/pull/18609). + if get_meta('protectionType'): + self.report_warning('This video is probably DRM protected.') + + query = { + 'connectionType': 'hd', + 'deviceType': 'ipad', + 'multibitrate': 'true', + } + if self._claims: + query['claims'] = self._claims + v_data = self._call_api('validation/v2/', video_id, app_code, query) + v_url = v_data.get('url') + if not v_url: + error = v_data['message'] + if error == "Le contenu sélectionné n'est pas disponible dans votre pays": + raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) + if error == 'Le contenu sélectionné est disponible seulement en premium': + self.raise_login_required(error) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) + formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') + + subtitles = {} + closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') + if closed_caption_url: + subtitles['fr'] = [{ + 'url': closed_caption_url, + 'ext': determine_ext(closed_caption_url, 'vtt'), + }] + + return { + 'id': video_id, + 'title': get_meta('Title') or get_meta('AV-nomEmission'), + 'description': get_meta('Description') or get_meta('ShortDescription'), + 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), + 'duration': int_or_none(get_meta('length')), + 'series': get_meta('Emission'), + 'season_number': int_or_none('SrcSaison'), + 'episode_number': int_or_none('SrcEpisode'), + 'upload_date': unified_strdate(get_meta('Date')), + 'subtitles': subtitles, + 'formats': formats, + } + + def _real_extract(self, url): + return self._extract_info(*self._match_valid_url(url).groups()) + + +class RadioCanadaAudioVideoIE(InfoExtractor): + IE_NAME = 'radiocanada:audiovideo' + _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', + 'info_dict': { + 'id': '7527184', + 'ext': 'mp4', + 'title': 'Barack Obama au Vietnam', + 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', + 'upload_date': '20160523', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', + 'only_matching': True, + }] + + def _real_extract(self, url): + return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) diff --git a/yt_dlp/extractor/radiocomercial.py b/yt_dlp/extractor/radiocomercial.py new file mode 100644 index 0000000..38f8cf7 --- /dev/null +++ b/yt_dlp/extractor/radiocomercial.py @@ -0,0 +1,154 @@ +import itertools + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + extract_attributes, + get_element_by_class, + get_element_html_by_class, + get_element_text_and_html_by_tag, + get_elements_html_by_class, + int_or_none, + join_nonempty, + try_call, + unified_strdate, + update_url, + urljoin +) +from ..utils.traversal import traverse_obj + + +class RadioComercialIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper', + 'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4', + 'info_dict': { + 'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas', + 'ext': 'mp3', + 'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.', + 'release_date': '20231025', + 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', + 'season': 'Season 6', + 'season_number': 6, + } + }, { + 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem', + 'md5': '47e96c273aef96a8eb160cd6cf46d782', + 'info_dict': { + 'id': 'convenca-me-num-minuto-que-os-lobisomens-existem', + 'ext': 'mp3', + 'title': 'Convença-me num minuto que os lobisomens existem', + 'release_date': '20231026', + 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', + 'season': 'Season 3', + 'season_number': 3, + } + }, { + 'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao', + 'md5': '69be64255420fec23b7259955d771e54', + 'info_dict': { + 'id': 'o-desastre-de-aviao', + 'ext': 'mp3', + 'title': 'O desastre de avião', + 'description': 'md5:8a82beeb372641614772baab7246245f', + 'release_date': '20231101', + 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', + 'season': 'Season 2', + 'season_number': 2, + }, + 'params': { + # inconsistant md5 + 'skip_download': True, + }, + }, { + 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro', + 'md5': '91d32d4d4b1407272068b102730fc9fa', + 'info_dict': { + 'id': 't-n-t-29-de-outubro', + 'ext': 'mp3', + 'title': 'T.N.T 29 de outubro', + 'release_date': '20231029', + 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', + 'season': 'Season 2023', + 'season_number': 2023, + } + }] + + def _real_extract(self, url): + video_id, season = self._match_valid_url(url).group('id', 'season') + webpage = self._download_webpage(url, video_id) + return { + 'id': video_id, + 'title': self._html_extract_title(webpage), + 'description': self._og_search_description(webpage, default=None), + 'release_date': unified_strdate(get_element_by_class( + 'date', get_element_html_by_class('descriptions', webpage) or '')), + 'thumbnail': self._og_search_thumbnail(webpage), + 'season_number': int_or_none(season), + 'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'), + } + + +class RadioComercialPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3', + 'info_dict': { + 'id': 'convenca-me-num-minuto_t3', + 'title': 'Convença-me num Minuto - Temporada 3', + }, + 'playlist_mincount': 32 + }, { + 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao', + 'info_dict': { + 'id': 'o-homem-que-mordeu-o-cao', + 'title': 'O Homem Que Mordeu o Cão', + }, + 'playlist_mincount': 19 + }, { + 'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas', + 'info_dict': { + 'id': 'as-minhas-coisas-favoritas', + 'title': 'As Minhas Coisas Favoritas', + }, + 'playlist_mincount': 131 + }, { + 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023', + 'info_dict': { + 'id': 'tnt-todos-no-top_t2023', + 'title': 'TNT - Todos No Top - Temporada 2023', + }, + 'playlist_mincount': 39 + }] + + def _entries(self, url, playlist_id): + for page in itertools.count(1): + try: + webpage = self._download_webpage( + f'{url}/{page}', playlist_id, f'Downloading page {page}') + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 404: + break + raise + + episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage) + if not episodes: + break + for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')): + episode_url = urljoin(url, url_path) + if RadioComercialIE.suitable(episode_url): + yield episode_url + + def _real_extract(self, url): + podcast, season = self._match_valid_url(url).group('id', 'season') + playlist_id = join_nonempty(podcast, season, delim='_t') + url = update_url(url, query=None, fragment=None) + webpage = self._download_webpage(url, playlist_id) + + name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0]) + title = name if name == season else join_nonempty(name, season, delim=' - Temporada ') + + return self.playlist_from_matches( + self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE) diff --git a/yt_dlp/extractor/radiode.py b/yt_dlp/extractor/radiode.py new file mode 100644 index 0000000..7262078 --- /dev/null +++ b/yt_dlp/extractor/radiode.py @@ -0,0 +1,50 @@ +from .common import InfoExtractor + + +class RadioDeIE(InfoExtractor): + _WORKING = False + IE_NAME = 'radio.de' + _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' + _TEST = { + 'url': 'http://ndr2.radio.de/', + 'info_dict': { + 'id': 'ndr2', + 'ext': 'mp3', + 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:591c49c702db1a33751625ebfb67f273', + 'thumbnail': r're:^https?://.*\.png', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + radio_id = self._match_id(url) + webpage = self._download_webpage(url, radio_id) + jscode = self._search_regex( + r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", + webpage, 'broadcast') + + broadcast = self._parse_json(jscode, radio_id) + title = broadcast['name'] + description = broadcast.get('description') or broadcast.get('shortDescription') + thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') + + formats = [{ + 'url': stream['streamUrl'], + 'ext': stream['streamContentFormat'].lower(), + 'acodec': stream['streamContentFormat'], + 'abr': stream['bitRate'], + 'asr': stream['sampleRate'] + } for stream in broadcast['streamUrls']] + + return { + 'id': radio_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'is_live': True, + 'formats': formats, + } diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py new file mode 100644 index 0000000..6bd6fe9 --- /dev/null +++ b/yt_dlp/extractor/radiofrance.py @@ -0,0 +1,473 @@ +import itertools +import re +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + join_nonempty, + js_to_json, + parse_duration, + strftime_or_none, + traverse_obj, + unified_strdate, + urljoin, +) + + +class RadioFranceIE(InfoExtractor): + _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)' + IE_NAME = 'radiofrance' + + _TEST = { + 'url': 'http://maison.radiofrance.fr/radiovisions/one-one', + 'md5': 'bdbb28ace95ed0e04faab32ba3160daf', + 'info_dict': { + 'id': 'one-one', + 'ext': 'ogg', + 'title': 'One to one', + 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.", + 'uploader': 'Thomas Hercouët', + }, + } + + def _real_extract(self, url): + m = self._match_valid_url(url) + video_id = m.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') + description = self._html_search_regex( + r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>', + webpage, 'description', fatal=False) + uploader = self._html_search_regex( + r'<div class="credit">  © (.*?)</div>', + webpage, 'uploader', fatal=False) + + formats_str = self._html_search_regex( + r'class="jp-jplayer[^"]*" data-source="([^"]+)">', + webpage, 'audio URLs') + formats = [ + { + 'format_id': fm[0], + 'url': fm[1], + 'vcodec': 'none', + 'quality': i, + } + for i, fm in + enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)) + ] + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + } + + +class RadioFranceBaseIE(InfoExtractor): + _VALID_URL_BASE = r'https?://(?:www\.)?radiofrance\.fr' + + _STATIONS_RE = '|'.join(map(re.escape, ( + 'franceculture', + 'franceinfo', + 'franceinter', + 'francemusique', + 'fip', + 'mouv', + ))) + + def _extract_data_from_webpage(self, webpage, display_id, key): + return traverse_obj(self._search_json( + r'\bconst\s+data\s*=', webpage, key, display_id, + contains_pattern=r'\[\{(?s:.+)\}\]', transform_source=js_to_json), + (..., 'data', key, {dict}), get_all=False) or {} + + +class FranceCultureIE(RadioFranceBaseIE): + _VALID_URL = rf'''(?x) + {RadioFranceBaseIE._VALID_URL_BASE} + /(?:{RadioFranceBaseIE._STATIONS_RE}) + /podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d{{6,}})(?:$|[?#]) + ''' + + _TESTS = [ + { + 'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487', + 'info_dict': { + 'id': '8440487', + 'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau', + 'ext': 'mp3', + 'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?', + 'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'upload_date': '20220514', + 'duration': 2750, + }, + }, + { + 'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9-30/le-7-9-30-du-vendredi-10-mars-2023-2107675', + 'info_dict': { + 'id': '2107675', + 'display_id': 'le-7-9-30-du-vendredi-10-mars-2023', + 'title': 'Inflation alimentaire : comment en sortir ? - Régis Debray et Claude Grange - Cybèle Idelot', + 'description': 'md5:36ee74351ede77a314fdebb94026b916', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'upload_date': '20230310', + 'duration': 8977, + 'ext': 'mp3', + }, + }, + { + 'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507', + 'only_matching': True, + }, { + 'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') + webpage = self._download_webpage(url, display_id) + + # _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846 + video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'{\s*"@type"\s*:\s*"AudioObject".+}') + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_data['contentUrl'], + 'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None, + 'duration': parse_duration(video_data.get('duration')), + 'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>', + webpage, 'title', default=self._og_search_title(webpage)), + 'description': self._html_search_regex( + r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None), + 'thumbnail': self._og_search_thumbnail(webpage), + 'uploader': self._html_search_regex( + r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None), + 'upload_date': unified_strdate(self._search_regex( + r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False)) + } + + +class RadioFranceLiveIE(RadioFranceBaseIE): + _VALID_URL = rf'''(?x) + https?://(?:www\.)?radiofrance\.fr + /(?P<id>{RadioFranceBaseIE._STATIONS_RE}) + /?(?P<substation_id>radio-[\w-]+)?(?:[#?]|$) + ''' + + _TESTS = [{ + 'url': 'https://www.radiofrance.fr/franceinter/', + 'info_dict': { + 'id': 'franceinter', + 'title': str, + 'live_status': 'is_live', + 'ext': 'aac', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://www.radiofrance.fr/franceculture', + 'info_dict': { + 'id': 'franceculture', + 'title': str, + 'live_status': 'is_live', + 'ext': 'aac', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://www.radiofrance.fr/mouv/radio-musique-kids-family', + 'info_dict': { + 'id': 'mouv-radio-musique-kids-family', + 'title': str, + 'live_status': 'is_live', + 'ext': 'aac', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://www.radiofrance.fr/mouv/radio-rnb-soul', + 'info_dict': { + 'id': 'mouv-radio-rnb-soul', + 'title': str, + 'live_status': 'is_live', + 'ext': 'aac', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://www.radiofrance.fr/mouv/radio-musique-mix', + 'info_dict': { + 'id': 'mouv-radio-musique-mix', + 'title': str, + 'live_status': 'is_live', + 'ext': 'aac', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://www.radiofrance.fr/fip/radio-rock', + 'info_dict': { + 'id': 'fip-radio-rock', + 'title': str, + 'live_status': 'is_live', + 'ext': 'aac', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://www.radiofrance.fr/mouv', + 'only_matching': True, + }] + + def _real_extract(self, url): + station_id, substation_id = self._match_valid_url(url).group('id', 'substation_id') + + if substation_id: + webpage = self._download_webpage(url, station_id) + api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData') + else: + api_response = self._download_json( + f'https://www.radiofrance.fr/{station_id}/api/live', station_id) + + formats, subtitles = [], {} + for media_source in traverse_obj(api_response, (('now', None), 'media', 'sources', lambda _, v: v['url'])): + if media_source.get('format') == 'hls': + fmts, subs = self._extract_m3u8_formats_and_subtitles(media_source['url'], station_id, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.append({ + 'url': media_source['url'], + 'abr': media_source.get('bitrate'), + }) + + return { + 'id': join_nonempty(station_id, substation_id), + 'title': traverse_obj(api_response, ('visual', 'legend')) or join_nonempty( + ('now', 'firstLine', 'title'), ('now', 'secondLine', 'title'), from_dict=api_response, delim=' - '), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + } + + +class RadioFrancePlaylistBaseIE(RadioFranceBaseIE): + """Subclasses must set _METADATA_KEY""" + + def _call_api(self, content_id, cursor, page_num): + raise NotImplementedError('This method must be implemented by subclasses') + + def _generate_playlist_entries(self, content_id, content_response): + for page_num in itertools.count(2): + for entry in content_response['items']: + yield self.url_result( + f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, { + 'title': 'title', + 'description': 'standFirst', + 'timestamp': ('publishedDate', {int_or_none}), + 'thumbnail': ('visual', 'src'), + })) + + next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False) + if not next_cursor: + break + + content_response = self._call_api(content_id, next_cursor, page_num) + + def _real_extract(self, url): + display_id = self._match_id(url) + + metadata = self._download_json( + 'https://www.radiofrance.fr/api/v2.1/path', display_id, + query={'value': urllib.parse.urlparse(url).path})['content'] + + content_id = metadata['id'] + + return self.playlist_result( + self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id, + display_id=display_id, **{**traverse_obj(metadata, { + 'title': 'title', + 'description': 'standFirst', + 'thumbnail': ('visual', 'src'), + }), **traverse_obj(metadata, { + 'title': 'name', + 'description': 'role', + })}) + + +class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE): + _VALID_URL = rf'''(?x) + {RadioFranceBaseIE._VALID_URL_BASE} + /(?:{RadioFranceBaseIE._STATIONS_RE}) + /podcasts/(?P<id>[\w-]+)/?(?:[?#]|$) + ''' + + _TESTS = [{ + 'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-vert', + 'info_dict': { + 'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17', + 'display_id': 'le-billet-vert', + 'title': 'Le billet sciences', + 'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'playlist_mincount': 11, + }, { + 'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale', + 'info_dict': { + 'id': '566fd524-3074-4fbc-ac69-8696f2152a54', + 'display_id': 'jean-marie-le-pen-l-obsession-nationale', + 'title': 'Jean-Marie Le Pen, l\'obsession nationale', + 'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'playlist_count': 7, + }, { + 'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine', + 'info_dict': { + 'id': '63c1ddc9-9f15-457a-98b2-411bac63f48d', + 'display_id': 'serie-thomas-grjebine', + 'title': 'Thomas Grjebine', + }, + 'playlist_count': 1, + }, { + 'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip', + 'info_dict': { + 'id': '143dff38-e956-4a5d-8576-1c0b7242b99e', + 'display_id': 'certains-l-aiment-fip', + 'title': 'Certains l’aiment Fip', + 'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'playlist_mincount': 321, + }, { + 'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9', + 'only_matching': True, + }, { + 'url': 'https://www.radiofrance.fr/mouv/podcasts/dirty-mix', + 'only_matching': True, + }] + + _METADATA_KEY = 'expressions' + + def _call_api(self, podcast_id, cursor, page_num): + return self._download_json( + f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id, + note=f'Downloading page {page_num}', query={'pageCursor': cursor}) + + +class RadioFranceProfileIE(RadioFrancePlaylistBaseIE): + _VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)' + + _TESTS = [{ + 'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3', + 'info_dict': { + 'id': '86c62790-e481-11e2-9f7b-782bcb6744eb', + 'display_id': 'thomas-pesquet', + 'title': 'Thomas Pesquet', + 'description': 'Astronaute à l\'agence spatiale européenne', + }, + 'playlist_mincount': 212, + }, { + 'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie', + 'info_dict': { + 'id': '9593050b-0183-4972-a0b5-d8f699079e02', + 'display_id': 'eugenie-bastie', + 'title': 'Eugénie Bastié', + 'description': 'Journaliste et essayiste', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'playlist_mincount': 39, + }, { + 'url': 'https://www.radiofrance.fr/personnes/lea-salame', + 'only_matching': True, + }] + + _METADATA_KEY = 'documents' + + def _call_api(self, profile_id, cursor, page_num): + resp = self._download_json( + f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id, + note=f'Downloading page {page_num}', query={ + 'relation': 'personality', + 'cursor': cursor, + }) + + resp['next'] = traverse_obj(resp, ('pagination', 'next')) + return resp + + +class RadioFranceProgramScheduleIE(RadioFranceBaseIE): + _VALID_URL = rf'''(?x) + {RadioFranceBaseIE._VALID_URL_BASE} + /(?P<station>{RadioFranceBaseIE._STATIONS_RE}) + /grille-programmes(?:\?date=(?P<date>[\d-]+))? + ''' + + _TESTS = [{ + 'url': 'https://www.radiofrance.fr/franceinter/grille-programmes?date=17-02-2023', + 'info_dict': { + 'id': 'franceinter-program-20230217', + 'upload_date': '20230217', + }, + 'playlist_count': 25, + }, { + 'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023', + 'info_dict': { + 'id': 'franceculture-program-20230201', + 'upload_date': '20230201', + }, + 'playlist_count': 25, + }, { + 'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023', + 'info_dict': { + 'id': 'mouv-program-20230319', + 'upload_date': '20230319', + }, + 'playlist_count': 3, + }, { + 'url': 'https://www.radiofrance.fr/francemusique/grille-programmes?date=18-03-2023', + 'info_dict': { + 'id': 'francemusique-program-20230318', + 'upload_date': '20230318', + }, + 'playlist_count': 15, + }, { + 'url': 'https://www.radiofrance.fr/franceculture/grille-programmes', + 'only_matching': True, + }] + + def _generate_playlist_entries(self, webpage_url, api_response): + for entry in traverse_obj(api_response, ('steps', lambda _, v: v['expression']['path'])): + yield self.url_result( + urljoin(webpage_url, f'/{entry["expression"]["path"]}'), ie=FranceCultureIE, + url_transparent=True, **traverse_obj(entry, { + 'title': ('expression', 'title'), + 'thumbnail': ('expression', 'visual', 'src'), + 'timestamp': ('startTime', {int_or_none}), + 'series_id': ('concept', 'id'), + 'series': ('concept', 'title'), + })) + + def _real_extract(self, url): + station, date = self._match_valid_url(url).group('station', 'date') + webpage = self._download_webpage(url, station) + grid_data = self._extract_data_from_webpage(webpage, station, 'grid') + upload_date = strftime_or_none(grid_data.get('date'), '%Y%m%d') + + return self.playlist_result( + self._generate_playlist_entries(url, grid_data), + join_nonempty(station, 'program', upload_date), upload_date=upload_date) diff --git a/yt_dlp/extractor/radiojavan.py b/yt_dlp/extractor/radiojavan.py new file mode 100644 index 0000000..b3befae --- /dev/null +++ b/yt_dlp/extractor/radiojavan.py @@ -0,0 +1,81 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + parse_resolution, + str_to_int, + unified_strdate, + urlencode_postdata, + urljoin, +) + + +class RadioJavanIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' + _TEST = { + 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', + 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', + 'info_dict': { + 'id': 'chaartaar-ashoobam', + 'ext': 'mp4', + 'title': 'Chaartaar - Ashoobam', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'upload_date': '20150215', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + download_host = self._download_json( + 'https://www.radiojavan.com/videos/video_host', video_id, + data=urlencode_postdata({'id': video_id}), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': url, + }).get('host', 'https://host1.rjmusicmedia.com') + + webpage = self._download_webpage(url, video_id) + + formats = [] + for format_id, _, video_path in re.findall( + r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', + webpage): + f = parse_resolution(format_id) + f.update({ + 'url': urljoin(download_host, video_path), + 'format_id': format_id, + }) + formats.append(f) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + upload_date = unified_strdate(self._search_regex( + r'class="date_added">Date added: ([^<]+)<', + webpage, 'upload date', fatal=False)) + + view_count = str_to_int(self._search_regex( + r'class="views">Plays: ([\d,]+)', + webpage, 'view count', fatal=False)) + like_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) likes', + webpage, 'like count', fatal=False)) + dislike_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) dislikes', + webpage, 'dislike count', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'formats': formats, + } diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py new file mode 100644 index 0000000..8f9737a --- /dev/null +++ b/yt_dlp/extractor/radiokapital.py @@ -0,0 +1,97 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + traverse_obj, + unescapeHTML, +) + +import itertools +from urllib.parse import urlencode + + +class RadioKapitalBaseIE(InfoExtractor): + def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): + return self._download_json( + f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}', + video_id, note=note) + + def _parse_episode(self, data): + release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3]) + return { + '_type': 'url_transparent', + 'url': data['mixcloud_url'], + 'ie_key': 'Mixcloud', + 'title': unescapeHTML(data['title']), + 'description': clean_html(data.get('content')), + 'tags': traverse_obj(data, ('tags', ..., 'name')), + 'release_date': release, + 'series': traverse_obj(data, ('show', 'title')), + } + + +class RadioKapitalIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial', + 'info_dict': { + 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20', + 'ext': 'm4a', + 'title': '#5: It’s okay to\xa0be\xa0immaterial', + 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4', + 'uploader': 'Radio Kapitał', + 'uploader_id': 'radiokapital', + 'timestamp': 1621640164, + 'upload_date': '20210521', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + episode = self._call_api('episodes/%s' % video_id, video_id) + return self._parse_episode(episode) + + +class RadioKapitalShowIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital:show' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/wesz', + 'info_dict': { + 'id': '100', + 'title': 'WĘSZ', + 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c', + }, + 'playlist_mincount': 17, + }] + + def _get_episode_list(self, series_id, page_no): + return self._call_api( + 'episodes', series_id, + f'Downloading episode list page #{page_no}', qs={ + 'show': series_id, + 'page': page_no, + }) + + def _entries(self, series_id): + for page_no in itertools.count(1): + episode_list = self._get_episode_list(series_id, page_no) + yield from (self._parse_episode(ep) for ep in episode_list['items']) + if episode_list['next'] is None: + break + + def _real_extract(self, url): + series_id = self._match_id(url) + + show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata') + entries = self._entries(series_id) + return { + '_type': 'playlist', + 'entries': entries, + 'id': str(show['id']), + 'title': show.get('title'), + 'description': clean_html(show.get('content')), + } diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py new file mode 100644 index 0000000..6752017 --- /dev/null +++ b/yt_dlp/extractor/radiozet.py @@ -0,0 +1,50 @@ +from .common import InfoExtractor +from ..utils import ( + traverse_obj, + strip_or_none, +) + + +class RadioZetPodcastIE(InfoExtractor): + _VALID_URL = r'https?://player\.radiozet\.pl\/Podcasty/.*?/(?P<id>.+)' + _TEST = { + 'url': 'https://player.radiozet.pl/Podcasty/Nie-Ma-Za-Co/O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'md5': 'e03665c316b4fbc5f6a8f232948bbba3', + 'info_dict': { + 'id': '42154', + 'display_id': 'O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'title': 'O przedmiotach szkolnych, które przydają się w życiu', + 'description': 'md5:fa72bed49da334b09e5b2f79851f185c', + 'release_timestamp': 1592985480, + 'ext': 'mp3', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 83, + 'series': 'Nie Ma Za Co', + 'creator': 'Katarzyna Pakosińska', + } + } + + def _call_api(self, podcast_id, display_id): + return self._download_json( + f'https://player.radiozet.pl/api/podcasts/getPodcast/(node)/{podcast_id}/(station)/radiozet', + display_id) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + podcast_id = self._html_search_regex(r'<div.*?\sid="player".*?\sdata-id=[\'"]([^\'"]+)[\'"]', + webpage, 'podcast id') + data = self._call_api(podcast_id, display_id)['data'][0] + + return { + 'id': podcast_id, + 'display_id': display_id, + 'title': strip_or_none(data.get('title')), + 'description': strip_or_none(traverse_obj(data, ('program', 'desc'))), + 'release_timestamp': data.get('published_date'), + 'url': traverse_obj(data, ('player', 'stream')), + 'thumbnail': traverse_obj(data, ('program', 'image', 'original')), + 'duration': traverse_obj(data, ('player', 'duration')), + 'series': strip_or_none(traverse_obj(data, ('program', 'title'))), + 'creator': strip_or_none(traverse_obj(data, ('presenter', 0, 'title'))), + } diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py new file mode 100644 index 0000000..3c00183 --- /dev/null +++ b/yt_dlp/extractor/radlive.py @@ -0,0 +1,180 @@ +import json + +from ..utils import ( + ExtractorError, + format_field, + traverse_obj, + try_get, + unified_timestamp +) +from .common import InfoExtractor + + +class RadLiveIE(InfoExtractor): + IE_NAME = 'radlive' + _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?P<content_type>feature|episode)/(?P<id>[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a', + 'md5': '6219d5d31d52de87d21c9cf5b7cb27ff', + 'info_dict': { + 'id': 'dc5acfbc-761b-4bec-9564-df999905116a', + 'ext': 'mp4', + 'title': 'Deathpact - Digital Mirage 2 [Full Set]', + 'language': 'en', + 'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png', + 'description': '', + 'release_timestamp': 1600185600.0, + 'channel': 'Proximity', + 'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009', + 'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009', + } + }, { + 'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', + 'md5': '40b2175f347592125d93e9a344080125', + 'info_dict': { + 'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', + 'ext': 'mp4', + 'title': 'E01: Bad Jokes 1', + 'language': 'en', + 'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg', + 'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype', + 'episode': 'E01: Bad Jokes 1', + 'episode_number': 1, + 'episode_id': '336', + }, + }] + + def _real_extract(self, url): + content_type, video_id = self._match_valid_url(url).groups() + + webpage = self._download_webpage(url, video_id) + + content_info = json.loads(self._search_regex( + r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>', + webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] + video_info = content_info[content_type] + + if not video_info: + raise ExtractorError('Unable to extract video info, make sure the URL is valid') + + formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id) + + data = video_info.get('structured_data', {}) + + release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate'))) + channel = next(iter(content_info.get('channels', [])), {}) + channel_id = channel.get('lrn', '').split(':')[-1] or None + + result = { + 'id': video_id, + 'title': video_info['title'], + 'formats': formats, + 'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')), + 'thumbnail': traverse_obj(data, ('image', 'contentUrl')), + 'description': data.get('description'), + 'release_timestamp': release_date, + 'channel': channel.get('name'), + 'channel_id': channel_id, + 'channel_url': format_field(channel_id, None, 'https://rad.live/content/channel/%s'), + + } + if content_type == 'episode': + result.update({ + # TODO: Get season number when downloading single episode + 'episode': video_info.get('title'), + 'episode_number': video_info.get('number'), + 'episode_id': video_info.get('id'), + }) + + return result + + +class RadLiveSeasonIE(RadLiveIE): # XXX: Do not subclass from concrete IE + IE_NAME = 'radlive:season' + _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75', + 'md5': '40b2175f347592125d93e9a344080125', + 'info_dict': { + 'id': '08a290f7-c9ef-4e22-9105-c255995a2e75', + 'title': 'Bad Jokes - Season 1', + }, + 'playlist_mincount': 5, + }] + + @classmethod + def suitable(cls, url): + return False if RadLiveIE.suitable(url) else super(RadLiveSeasonIE, cls).suitable(url) + + def _real_extract(self, url): + season_id = self._match_id(url) + webpage = self._download_webpage(url, season_id) + + content_info = json.loads(self._search_regex( + r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>', + webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] + video_info = content_info['season'] + + entries = [{ + '_type': 'url_transparent', + 'id': episode['structured_data']['url'].split('/')[-1], + 'url': episode['structured_data']['url'], + 'series': try_get(content_info, lambda x: x['series']['title']), + 'season': video_info['title'], + 'season_number': video_info.get('number'), + 'season_id': video_info.get('id'), + 'ie_key': RadLiveIE.ie_key(), + } for episode in video_info['episodes']] + + return self.playlist_result(entries, season_id, video_info.get('title')) + + +class RadLiveChannelIE(RadLiveIE): # XXX: Do not subclass from concrete IE + IE_NAME = 'radlive:channel' + _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274', + 'md5': '625156a08b7f2b0b849f234e664457ac', + 'info_dict': { + 'id': '5c4d8df4-6fa0-413c-81e3-873479b49274', + 'title': 'Whistle Sports', + }, + 'playlist_mincount': 7, + }] + + _QUERY = ''' +query WebChannelListing ($lrn: ID!) { + channel (id:$lrn) { + name + features { + structured_data + } + } +}''' + + @classmethod + def suitable(cls, url): + return False if RadLiveIE.suitable(url) else super(RadLiveChannelIE, cls).suitable(url) + + def _real_extract(self, url): + channel_id = self._match_id(url) + + graphql = self._download_json( + 'https://content.mhq.12core.net/graphql', channel_id, + headers={'Content-Type': 'application/json'}, + data=json.dumps({ + 'query': self._QUERY, + 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'} + }).encode('utf-8')) + + data = traverse_obj(graphql, ('data', 'channel')) + if not data: + raise ExtractorError('Unable to extract video info, make sure the URL is valid') + + entries = [{ + '_type': 'url_transparent', + 'url': feature['structured_data']['url'], + 'ie_key': RadLiveIE.ie_key(), + } for feature in data['features']] + + return self.playlist_result(entries, channel_id, data.get('name')) diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py new file mode 100644 index 0000000..c1fc65c --- /dev/null +++ b/yt_dlp/extractor/rai.py @@ -0,0 +1,816 @@ +import re + +from .common import InfoExtractor +from ..networking import HEADRequest +from ..utils import ( + clean_html, + determine_ext, + ExtractorError, + filter_dict, + GeoRestrictedError, + int_or_none, + join_nonempty, + parse_duration, + remove_start, + strip_or_none, + traverse_obj, + try_get, + unified_strdate, + unified_timestamp, + update_url_query, + urljoin, + xpath_text, +) + + +class RaiBaseIE(InfoExtractor): + _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' + _GEO_COUNTRIES = ['IT'] + _GEO_BYPASS = False + + def _fix_m3u8_formats(self, media_url, video_id): + fmts = self._extract_m3u8_formats( + media_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + + # Fix malformed m3u8 manifests by setting audio-only/video-only formats + for f in fmts: + if not f.get('acodec'): + f['acodec'] = 'mp4a' + if not f.get('vcodec'): + f['vcodec'] = 'avc1' + man_url = f['url'] + if re.search(r'chunklist(?:_b\d+)*_ao[_.]', man_url): # audio only + f['vcodec'] = 'none' + elif re.search(r'chunklist(?:_b\d+)*_vo[_.]', man_url): # video only + f['acodec'] = 'none' + else: # video+audio + if f['acodec'] == 'none': + f['acodec'] = 'mp4a' + if f['vcodec'] == 'none': + f['vcodec'] = 'avc1' + + return fmts + + def _extract_relinker_info(self, relinker_url, video_id, audio_only=False): + def fix_cdata(s): + # remove \r\n\t before and after <![CDATA[ ]]> to avoid + # polluted text with xpath_text + s = re.sub(r'(\]\]>)[\r\n\t]+(</)', '\\1\\2', s) + return re.sub(r'(>)[\r\n\t]+(<!\[CDATA\[)', '\\1\\2', s) + + if not re.match(r'https?://', relinker_url): + return {'formats': [{'url': relinker_url}]} + + # set User-Agent to generic 'Rai' to avoid quality filtering from + # the media server and get the maximum qualities available + relinker = self._download_xml( + relinker_url, video_id, note='Downloading XML metadata', + transform_source=fix_cdata, query={'output': 64}, + headers={**self.geo_verification_headers(), 'User-Agent': 'Rai'}) + + if xpath_text(relinker, './license_url', default='{}') != '{}': + self.report_drm(video_id) + + is_live = xpath_text(relinker, './is_live', default='N') == 'Y' + duration = parse_duration(xpath_text(relinker, './duration', default=None)) + media_url = xpath_text(relinker, './url[@type="content"]', default=None) + + if not media_url: + self.raise_no_formats('The relinker returned no media url') + + # geo flag is a bit unreliable and not properly set all the time + geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y' + + ext = determine_ext(media_url) + formats = [] + + if ext == 'mp3': + formats.append({ + 'url': media_url, + 'vcodec': 'none', + 'acodec': 'mp3', + 'format_id': 'https-mp3', + }) + elif ext == 'm3u8' or 'format=m3u8' in media_url: + formats.extend(self._fix_m3u8_formats(media_url, video_id)) + elif ext == 'f4m': + # very likely no longer needed. Cannot find any url that uses it. + manifest_url = update_url_query( + media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), + {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) + formats.extend(self._extract_f4m_formats( + manifest_url, video_id, f4m_id='hds', fatal=False)) + elif ext == 'mp4': + bitrate = int_or_none(xpath_text(relinker, './bitrate')) + formats.append({ + 'url': media_url, + 'tbr': bitrate if bitrate > 0 else None, + 'format_id': join_nonempty('https', bitrate, delim='-'), + }) + else: + raise ExtractorError('Unrecognized media file found') + + if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url: + self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) + + if not audio_only and not is_live: + formats.extend(self._create_http_urls(media_url, relinker_url, formats, video_id)) + + return filter_dict({ + 'is_live': is_live, + 'duration': duration, + 'formats': formats, + }) + + def _create_http_urls(self, manifest_url, relinker_url, fmts, video_id): + _MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8' + _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' + _QUALITY = { + # tbr: w, h + 250: [352, 198], + 400: [512, 288], + 600: [512, 288], + 700: [512, 288], + 800: [700, 394], + 1200: [736, 414], + 1500: [920, 518], + 1800: [1024, 576], + 2400: [1280, 720], + 3200: [1440, 810], + 3600: [1440, 810], + 5000: [1920, 1080], + 10000: [1920, 1080], + } + + def percentage(number, target, pc=20, roof=125): + '''check if the target is in the range of number +/- percent''' + if not number or number < 0: + return False + return abs(target - number) < min(float(number) * float(pc) / 100.0, roof) + + def get_format_info(tbr): + import math + br = int_or_none(tbr) + if len(fmts) == 1 and not br: + br = fmts[0].get('tbr') + if br and br > 300: + tbr = math.floor(br / 100) * 100 + else: + tbr = 250 + + # try extracting info from available m3u8 formats + format_copy = [None, None] + for f in fmts: + if f.get('tbr'): + if percentage(tbr, f['tbr']): + format_copy[0] = f.copy() + if [f.get('width'), f.get('height')] == _QUALITY.get(tbr): + format_copy[1] = f.copy() + format_copy[1]['tbr'] = tbr + + # prefer format with similar bitrate because there might be + # multiple video with the same resolution but different bitrate + format_copy = format_copy[0] or format_copy[1] or {} + return { + 'format_id': f'https-{tbr}', + 'width': format_copy.get('width'), + 'height': format_copy.get('height'), + 'tbr': format_copy.get('tbr') or tbr, + 'vcodec': format_copy.get('vcodec') or 'avc1', + 'acodec': format_copy.get('acodec') or 'mp4a', + 'fps': format_copy.get('fps') or 25, + } if format_copy else { + 'format_id': f'https-{tbr}', + 'width': _QUALITY[tbr][0], + 'height': _QUALITY[tbr][1], + 'tbr': tbr, + 'vcodec': 'avc1', + 'acodec': 'mp4a', + 'fps': 25, + } + + # Check if MP4 download is available + try: + self._request_webpage( + HEADRequest(_MP4_TMPL % (relinker_url, '*')), video_id, 'Checking MP4 availability') + except ExtractorError as e: + self.to_screen(f'{video_id}: MP4 direct download is not available: {e.cause}') + return [] + + # filter out single-stream formats + fmts = [f for f in fmts + if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none'] + + mobj = re.search(_MANIFEST_REG, manifest_url) + if not mobj: + return [] + available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*'] + + formats = [] + for q in filter(None, available_qualities): + self.write_debug(f'Creating https format for quality {q}') + formats.append({ + 'url': _MP4_TMPL % (relinker_url, q), + 'protocol': 'https', + 'ext': 'mp4', + **get_format_info(q) + }) + return formats + + @staticmethod + def _get_thumbnails_list(thumbs, url): + return [{ + 'url': urljoin(url, thumb_url), + } for thumb_url in (thumbs or {}).values() if thumb_url] + + @staticmethod + def _extract_subtitles(url, video_data): + STL_EXT = 'stl' + SRT_EXT = 'srt' + subtitles = {} + subtitles_array = video_data.get('subtitlesArray') or video_data.get('subtitleList') or [] + for k in ('subtitles', 'subtitlesUrl'): + subtitles_array.append({'url': video_data.get(k)}) + for subtitle in subtitles_array: + sub_url = subtitle.get('url') + if sub_url and isinstance(sub_url, str): + sub_lang = subtitle.get('language') or 'it' + sub_url = urljoin(url, sub_url) + sub_ext = determine_ext(sub_url, SRT_EXT) + subtitles.setdefault(sub_lang, []).append({ + 'ext': sub_ext, + 'url': sub_url, + }) + if STL_EXT == sub_ext: + subtitles[sub_lang].append({ + 'ext': SRT_EXT, + 'url': sub_url[:-len(STL_EXT)] + SRT_EXT, + }) + return subtitles + + +class RaiPlayIE(RaiBaseIE): + _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)' + _TESTS = [{ + 'url': 'https://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', + 'md5': '8970abf8caf8aef4696e7b1f2adfc696', + 'info_dict': { + 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', + 'ext': 'mp4', + 'title': 'Report del 07/04/2014', + 'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014', + 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e', + 'thumbnail': r're:^https?://www\.raiplay\.it/.+\.jpg', + 'uploader': 'Rai 3', + 'creator': 'Rai 3', + 'duration': 6160, + 'series': 'Report', + 'season': '2013/14', + 'subtitles': {'it': 'count:4'}, + 'release_year': 2024, + 'episode': 'Espresso nel caffè - 07/04/2014', + 'timestamp': 1396919880, + 'upload_date': '20140408', + 'formats': 'count:4', + }, + 'params': {'skip_download': True}, + }, { + # 1080p + 'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html', + 'md5': 'aeda7243115380b2dd5e881fd42d949a', + 'info_dict': { + 'id': 'b1255a4a-8e72-4a2f-b9f3-fc1308e00736', + 'ext': 'mp4', + 'title': 'Blanca - S1E1 - Senza occhi', + 'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi', + 'description': 'md5:75f95d5c030ec8bac263b1212322e28c', + 'thumbnail': r're:^https://www\.raiplay\.it/dl/img/.+\.jpg', + 'uploader': 'Rai Premium', + 'creator': 'Rai Fiction', + 'duration': 6493, + 'series': 'Blanca', + 'season': 'Season 1', + 'episode_number': 1, + 'release_year': 2021, + 'season_number': 1, + 'episode': 'Senza occhi', + 'timestamp': 1637318940, + 'upload_date': '20211119', + 'formats': 'count:7', + }, + 'params': {'skip_download': True}, + 'expected_warnings': ['Video not available. Likely due to geo-restriction.'] + }, { + # 1500 quality + 'url': 'https://www.raiplay.it/video/2012/09/S1E11---Tutto-cio-che-luccica-0cab3323-732e-45d6-8e86-7704acab6598.html', + 'md5': 'a634d20e8ab2d43724c273563f6bf87a', + 'info_dict': { + 'id': '0cab3323-732e-45d6-8e86-7704acab6598', + 'ext': 'mp4', + 'title': 'Mia and Me - S1E11 - Tutto ciò che luccica', + 'alt_title': 'St 1 Ep 11 - Mia and Me - Tutto ciò che luccica', + 'description': 'md5:4969e594184b1920c4c1f2b704da9dea', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Rai Gulp', + 'series': 'Mia and Me', + 'season': 'Season 1', + 'episode_number': 11, + 'release_year': 2015, + 'season_number': 1, + 'episode': 'Tutto ciò che luccica', + 'timestamp': 1348495020, + 'upload_date': '20120924', + }, + }, { + 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', + 'only_matching': True, + }, { + # subtitles at 'subtitlesArray' key (see #27698) + 'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html', + 'only_matching': True, + }, { + # DRM protected + 'url': 'https://www.raiplay.it/video/2021/06/Lo-straordinario-mondo-di-Zoey-S2E1-Lo-straordinario-ritorno-di-Zoey-3ba992de-2332-41ad-9214-73e32ab209f4.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + base, video_id = self._match_valid_url(url).groups() + + media = self._download_json( + f'{base}.json', video_id, 'Downloading video JSON') + + if not self.get_param('allow_unplayable_formats'): + if traverse_obj(media, (('program_info', None), 'rights_management', 'rights', 'drm')): + self.report_drm(video_id) + + video = media['video'] + relinker_info = self._extract_relinker_info(video['content_url'], video_id) + date_published = join_nonempty( + media.get('date_published'), media.get('time_published'), delim=' ') + season = media.get('season') + alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ') + + return { + 'id': remove_start(media.get('id'), 'ContentItem-') or video_id, + 'display_id': video_id, + 'title': media.get('name'), + 'alt_title': strip_or_none(alt_title or None), + 'description': media.get('description'), + 'uploader': strip_or_none( + traverse_obj(media, ('program_info', 'channel')) + or media.get('channel') or None), + 'creator': strip_or_none( + traverse_obj(media, ('program_info', 'editor')) + or media.get('editor') or None), + 'duration': parse_duration(video.get('duration')), + 'timestamp': unified_timestamp(date_published), + 'thumbnails': self._get_thumbnails_list(media.get('images'), url), + 'series': traverse_obj(media, ('program_info', 'name')), + 'season_number': int_or_none(season), + 'season': season if (season and not season.isdigit()) else None, + 'episode': media.get('episode_title'), + 'episode_number': int_or_none(media.get('episode')), + 'subtitles': self._extract_subtitles(url, video), + 'release_year': int_or_none(traverse_obj(media, ('track_info', 'edit_year'))), + **relinker_info + } + + +class RaiPlayLiveIE(RaiPlayIE): # XXX: Do not subclass from concrete IE + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))' + _TESTS = [{ + 'url': 'http://www.raiplay.it/dirette/rainews24', + 'info_dict': { + 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', + 'display_id': 'rainews24', + 'ext': 'mp4', + 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', + 'uploader': 'Rai News 24', + 'creator': 'Rai News 24', + 'is_live': True, + 'live_status': 'is_live', + 'upload_date': '20090502', + 'timestamp': 1241276220, + 'formats': 'count:3', + }, + 'params': {'skip_download': True}, + }] + + +class RaiPlayPlaylistIE(InfoExtractor): + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?' + _TESTS = [{ + # entire series episodes + extras... + 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/', + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo', + 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', + }, + 'playlist_mincount': 30, + }, { + # single season + 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/episodi/stagione-2/', + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo - Stagione 2', + 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', + }, + 'playlist_count': 12, + }] + + def _real_extract(self, url): + base, playlist_id, extra_id = self._match_valid_url(url).groups() + + program = self._download_json( + f'{base}.json', playlist_id, 'Downloading program JSON') + + if extra_id: + extra_id = extra_id.upper().rstrip('/') + + playlist_title = program.get('name') + entries = [] + for b in (program.get('blocks') or []): + for s in (b.get('sets') or []): + if extra_id: + if extra_id != join_nonempty( + b.get('name'), s.get('name'), delim='/').replace(' ', '-').upper(): + continue + playlist_title = join_nonempty(playlist_title, s.get('name'), delim=' - ') + + s_id = s.get('id') + if not s_id: + continue + medias = self._download_json( + f'{base}/{s_id}.json', s_id, + 'Downloading content set JSON', fatal=False) + if not medias: + continue + for m in (medias.get('items') or []): + path_id = m.get('path_id') + if not path_id: + continue + video_url = urljoin(url, path_id) + entries.append(self.url_result( + video_url, ie=RaiPlayIE.ie_key(), + video_id=RaiPlayIE._match_id(video_url))) + + return self.playlist_result( + entries, playlist_id, playlist_title, + try_get(program, lambda x: x['program_info']['description'])) + + +class RaiPlaySoundIE(RaiBaseIE): + _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplaysound\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)' + _TESTS = [{ + 'url': 'https://www.raiplaysound.it/audio/2021/12/IL-RUGGITO-DEL-CONIGLIO-1ebae2a7-7cdb-42bb-842e-fe0d193e9707.html', + 'md5': '8970abf8caf8aef4696e7b1f2adfc696', + 'info_dict': { + 'id': '1ebae2a7-7cdb-42bb-842e-fe0d193e9707', + 'ext': 'mp3', + 'title': 'Il Ruggito del Coniglio del 10/12/2021', + 'alt_title': 'md5:0e6476cd57858bb0f3fcc835d305b455', + 'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2', + 'thumbnail': r're:^https?://.+\.jpg$', + 'uploader': 'rai radio 2', + 'duration': 5685, + 'series': 'Il Ruggito del Coniglio', + 'episode': 'Il Ruggito del Coniglio del 10/12/2021', + 'creator': 'rai radio 2', + 'timestamp': 1638346620, + 'upload_date': '20211201', + }, + 'params': {'skip_download': True}, + }] + + def _real_extract(self, url): + base, audio_id = self._match_valid_url(url).group('base', 'id') + media = self._download_json(f'{base}.json', audio_id, 'Downloading audio JSON') + uid = try_get(media, lambda x: remove_start(remove_start(x['uniquename'], 'ContentItem-'), 'Page-')) + + info = {} + formats = [] + relinkers = set(traverse_obj(media, (('downloadable_audio', 'audio', ('live', 'cards', 0, 'audio')), 'url'))) + for r in relinkers: + info = self._extract_relinker_info(r, audio_id, True) + formats.extend(info.get('formats')) + + date_published = try_get(media, (lambda x: f'{x["create_date"]} {x.get("create_time") or ""}', + lambda x: x['live']['create_date'])) + + podcast_info = traverse_obj(media, 'podcast_info', ('live', 'cards', 0)) or {} + + return { + **info, + 'id': uid or audio_id, + 'display_id': audio_id, + 'title': traverse_obj(media, 'title', 'episode_title'), + 'alt_title': traverse_obj(media, ('track_info', 'media_name'), expected_type=strip_or_none), + 'description': media.get('description'), + 'uploader': traverse_obj(media, ('track_info', 'channel'), expected_type=strip_or_none), + 'creator': traverse_obj(media, ('track_info', 'editor'), expected_type=strip_or_none), + 'timestamp': unified_timestamp(date_published), + 'thumbnails': self._get_thumbnails_list(podcast_info.get('images'), url), + 'series': podcast_info.get('title'), + 'season_number': int_or_none(media.get('season')), + 'episode': media.get('episode_title'), + 'episode_number': int_or_none(media.get('episode')), + 'formats': formats, + } + + +class RaiPlaySoundLiveIE(RaiPlaySoundIE): # XXX: Do not subclass from concrete IE + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)' + _TESTS = [{ + 'url': 'https://www.raiplaysound.it/radio2', + 'info_dict': { + 'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44', + 'display_id': 'radio2', + 'ext': 'mp4', + 'title': r're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+', + 'thumbnail': r're:^https://www\.raiplaysound\.it/dl/img/.+\.png', + 'uploader': 'rai radio 2', + 'series': 'Rai Radio 2', + 'creator': 'raiplaysound', + 'is_live': True, + 'live_status': 'is_live', + }, + 'params': {'skip_download': True}, + }] + + +class RaiPlaySoundPlaylistIE(InfoExtractor): + _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?:programmi|playlist|audiolibri)/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?' + _TESTS = [{ + # entire show + 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio', + 'info_dict': { + 'id': 'ilruggitodelconiglio', + 'title': 'Il Ruggito del Coniglio', + 'description': 'md5:62a627b3a2d0635d08fa8b6e0a04f27e', + }, + 'playlist_mincount': 65, + }, { + # single season + 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio/puntate/prima-stagione-1995', + 'info_dict': { + 'id': 'ilruggitodelconiglio_puntate_prima-stagione-1995', + 'title': 'Prima Stagione 1995', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + base, playlist_id, extra_id = self._match_valid_url(url).group('base', 'id', 'extra_id') + url = f'{base}.json' + program = self._download_json(url, playlist_id, 'Downloading program JSON') + + if extra_id: + extra_id = extra_id.rstrip('/') + playlist_id += '_' + extra_id.replace('/', '_') + path = next(c['path_id'] for c in program.get('filters') or [] if extra_id in c.get('weblink')) + program = self._download_json( + urljoin('https://www.raiplaysound.it', path), playlist_id, 'Downloading program secondary JSON') + + entries = [ + self.url_result(urljoin(base, c['path_id']), ie=RaiPlaySoundIE.ie_key()) + for c in traverse_obj(program, 'cards', ('block', 'cards')) or [] + if c.get('path_id')] + + return self.playlist_result(entries, playlist_id, program.get('title'), + traverse_obj(program, ('podcast_info', 'description'))) + + +class RaiIE(RaiBaseIE): + _VALID_URL = rf'https?://[^/]+\.(?:rai\.(?:it|tv))/.+?-(?P<id>{RaiBaseIE._UUID_RE})(?:-.+?)?\.html' + _TESTS = [{ + 'url': 'https://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', + 'info_dict': { + 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', + 'ext': 'mp4', + 'title': 'TG PRIMO TEMPO', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 1758, + 'upload_date': '20140612', + }, + 'params': {'skip_download': True}, + 'expected_warnings': ['Video not available. Likely due to geo-restriction.'] + }, { + 'url': 'https://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', + 'info_dict': { + 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', + 'ext': 'mp4', + 'title': 'TG1 ore 20:00 del 03/11/2016', + 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2214, + 'upload_date': '20161103' + }, + 'params': {'skip_download': True}, + }, { + # Direct MMS: Media URL no longer works. + 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + content_id = self._match_id(url) + media = self._download_json( + f'https://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-{content_id}.html?json', + content_id, 'Downloading video JSON', fatal=False, expected_status=404) + + if media is None: + return None + + if 'Audio' in media['type']: + relinker_info = { + 'formats': [{ + 'format_id': join_nonempty('https', media.get('formatoAudio'), delim='-'), + 'url': media['audioUrl'], + 'ext': media.get('formatoAudio'), + 'vcodec': 'none', + 'acodec': media.get('formatoAudio'), + }] + } + elif 'Video' in media['type']: + relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) + else: + raise ExtractorError('not a media file') + + thumbnails = self._get_thumbnails_list( + {image_type: media.get(image_type) for image_type in ( + 'image', 'image_medium', 'image_300')}, url) + + return { + 'id': content_id, + 'title': strip_or_none(media.get('name') or media.get('title')), + 'description': strip_or_none(media.get('desc')) or None, + 'thumbnails': thumbnails, + 'uploader': strip_or_none(media.get('author')) or None, + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(media.get('length')), + 'subtitles': self._extract_subtitles(url, media), + **relinker_info + } + + +class RaiNewsIE(RaiBaseIE): + _VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html' + _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)'] + _TESTS = [{ + # new rainews player (#3911) + 'url': 'https://www.rainews.it/video/2024/02/membri-della-croce-rossa-evacuano-gli-abitanti-di-un-villaggio-nella-regione-ucraina-di-kharkiv-il-filmato-dallucraina--31e8017c-845c-43f5-9c48-245b43c3a079.html', + 'info_dict': { + 'id': '31e8017c-845c-43f5-9c48-245b43c3a079', + 'ext': 'mp4', + 'title': 'md5:1e81364b09de4a149042bac3c7d36f0b', + 'duration': 196, + 'upload_date': '20240225', + 'uploader': 'rainews', + 'formats': 'count:2', + }, + 'params': {'skip_download': True}, + }, { + # old content with fallback method to extract media urls + 'url': 'https://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', + 'info_dict': { + 'id': '1632c009-c843-4836-bb65-80c33084a64b', + 'ext': 'mp4', + 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"', + 'description': 'I film in uscita questa settimana.', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 833, + 'upload_date': '20161103', + 'formats': 'count:8', + }, + 'params': {'skip_download': True}, + 'expected_warnings': ['unable to extract player_data'], + }, { + # iframe + drm + 'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html', + 'only_matching': True, + }] + _PLAYER_TAG = 'news' + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + player_data = self._search_json( + rf'<rai{self._PLAYER_TAG}-player\s*data=\'', webpage, 'player_data', video_id, + transform_source=clean_html, default={}) + track_info = player_data.get('track_info') + relinker_url = traverse_obj(player_data, 'mediapolis', 'content_url') + + if not relinker_url: + # fallback on old implementation for some old content + try: + return RaiIE._real_extract(self, url) + except GeoRestrictedError: + raise + except ExtractorError as e: + raise ExtractorError('Relinker URL not found', cause=e) + + relinker_info = self._extract_relinker_info(urljoin(url, relinker_url), video_id) + + return { + 'id': video_id, + 'title': player_data.get('title') or track_info.get('title') or self._og_search_title(webpage), + 'upload_date': unified_strdate(track_info.get('date')), + 'uploader': strip_or_none(track_info.get('editor') or None), + **relinker_info + } + + +class RaiCulturaIE(RaiNewsIE): # XXX: Do not subclass from concrete IE + _VALID_URL = rf'https?://(www\.)?raicultura\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html' + _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)'] + _TESTS = [{ + 'url': 'https://www.raicultura.it/letteratura/articoli/2018/12/Alberto-Asor-Rosa-Letteratura-e-potere-05ba8775-82b5-45c5-a89d-dd955fbde1fb.html', + 'info_dict': { + 'id': '05ba8775-82b5-45c5-a89d-dd955fbde1fb', + 'ext': 'mp4', + 'title': 'Alberto Asor Rosa: Letteratura e potere', + 'duration': 1756, + 'upload_date': '20181206', + 'uploader': 'raicultura', + 'formats': 'count:2', + }, + 'params': {'skip_download': True}, + }] + _PLAYER_TAG = 'cultura' + + +class RaiSudtirolIE(RaiBaseIE): + _VALID_URL = r'https?://raisudtirol\.rai\.it/.+media=(?P<id>\w+)' + _TESTS = [{ + # mp4 file + 'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460', + 'info_dict': { + 'id': 'Ptv1619729460', + 'ext': 'mp4', + 'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51', + 'series': 'Euro: trasmisciun d\'economia', + 'upload_date': '20210429', + 'thumbnail': r're:https://raisudtirol\.rai\.it/img/.+\.jpg', + 'uploader': 'raisudtirol', + 'formats': 'count:1', + }, + 'params': {'skip_download': True}, + }, { + # m3u manifest + 'url': 'https://raisudtirol.rai.it/it/kidsplayer.php?lang=it&media=GUGGUG_P1.smil', + 'info_dict': { + 'id': 'GUGGUG_P1', + 'ext': 'mp4', + 'title': 'GUGGUG! La Prospettiva - Die Perspektive', + 'uploader': 'raisudtirol', + 'formats': 'count:6', + }, + 'params': {'skip_download': True}, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_date = self._html_search_regex( + r'<span class="med_data">(.+?)</span>', webpage, 'video_date', default=None) + video_title = self._html_search_regex([ + r'<span class="med_title">(.+?)</span>', r'title: \'(.+?)\','], + webpage, 'video_title', default=None) + video_url = self._html_search_regex([ + r'sources:\s*\[\{file:\s*"(.+?)"\}\]', + r'<source\s+src="(.+?)"\s+type="application/x-mpegURL"'], + webpage, 'video_url', default=None) + + ext = determine_ext(video_url) + if ext == 'm3u8': + formats = self._extract_m3u8_formats(video_url, video_id) + elif ext == 'mp4': + formats = [{ + 'format_id': 'https-mp4', + 'url': self._proto_relative_url(video_url), + 'width': 1024, + 'height': 576, + 'fps': 25, + 'vcodec': 'avc1', + 'acodec': 'mp4a', + }] + else: + formats = [] + self.raise_no_formats(f'Unrecognized media file: {video_url}') + + return { + 'id': video_id, + 'title': join_nonempty(video_title, video_date, delim=' - '), + 'series': video_title if video_date else None, + 'upload_date': unified_strdate(video_date), + 'thumbnail': urljoin('https://raisudtirol.rai.it/', self._html_search_regex( + r'image: \'(.+?)\'', webpage, 'video_thumb', default=None)), + 'uploader': 'raisudtirol', + 'formats': formats, + } diff --git a/yt_dlp/extractor/raywenderlich.py b/yt_dlp/extractor/raywenderlich.py new file mode 100644 index 0000000..e0e3c3e --- /dev/null +++ b/yt_dlp/extractor/raywenderlich.py @@ -0,0 +1,177 @@ +import re + +from .common import InfoExtractor +from .vimeo import VimeoIE +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + merge_dicts, + try_get, + unescapeHTML, + unified_timestamp, + urljoin, +) + + +class RayWenderlichIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + videos\.raywenderlich\.com/courses| + (?:www\.)?raywenderlich\.com + )/ + (?P<course_id>[^/]+)/lessons/(?P<id>\d+) + ''' + + _TESTS = [{ + 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', + 'info_dict': { + 'id': '248377018', + 'ext': 'mp4', + 'title': 'Introduction', + 'description': 'md5:804d031b3efa9fcb49777d512d74f722', + 'timestamp': 1513906277, + 'upload_date': '20171222', + 'duration': 133, + 'uploader': 'Ray Wenderlich', + 'uploader_id': 'user3304672', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + }, + 'add_ie': [VimeoIE.ie_key()], + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, { + 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', + 'only_matching': True, + }] + + @staticmethod + def _extract_video_id(data, lesson_id): + if not data: + return + groups = try_get(data, lambda x: x['groups'], list) or [] + if not groups: + return + for group in groups: + if not isinstance(group, dict): + continue + contents = try_get(data, lambda x: x['contents'], list) or [] + for content in contents: + if not isinstance(content, dict): + continue + ordinal = int_or_none(content.get('ordinal')) + if ordinal != lesson_id: + continue + video_id = content.get('identifier') + if video_id: + return compat_str(video_id) + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + course_id, lesson_id = mobj.group('course_id', 'id') + display_id = '%s/%s' % (course_id, lesson_id) + + webpage = self._download_webpage(url, display_id) + + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'twitter:image', webpage, 'thumbnail') + + if '>Subscribe to unlock' in webpage: + raise ExtractorError( + 'This content is only available for subscribers', + expected=True) + + info = { + 'thumbnail': thumbnail, + } + + vimeo_id = self._search_regex( + r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) + + if not vimeo_id: + data = self._parse_json( + self._search_regex( + r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, + 'data collection', default='{}', group='data'), + display_id, transform_source=unescapeHTML, fatal=False) + video_id = self._extract_video_id( + data, lesson_id) or self._search_regex( + r'/videos/(\d+)/', thumbnail, 'video id') + headers = { + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest', + } + csrf_token = self._html_search_meta( + 'csrf-token', webpage, 'csrf token', default=None) + if csrf_token: + headers['X-CSRF-Token'] = csrf_token + video = self._download_json( + 'https://videos.raywenderlich.com/api/v1/videos/%s.json' + % video_id, display_id, headers=headers)['video'] + vimeo_id = video['clips'][0]['provider_id'] + info.update({ + '_type': 'url_transparent', + 'title': video.get('name'), + 'description': video.get('description') or video.get( + 'meta_description'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': unified_timestamp(video.get('created_at')), + }) + + return merge_dicts(info, self.url_result( + VimeoIE._smuggle_referrer( + 'https://player.vimeo.com/video/%s' % vimeo_id, url), + ie=VimeoIE.ie_key(), video_id=vimeo_id)) + + +class RayWenderlichCourseIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + videos\.raywenderlich\.com/courses| + (?:www\.)?raywenderlich\.com + )/ + (?P<id>[^/]+) + ''' + + _TEST = { + 'url': 'https://www.raywenderlich.com/3530-testing-in-ios', + 'info_dict': { + 'title': 'Testing in iOS', + 'id': '3530-testing-in-ios', + }, + 'params': { + 'noplaylist': False, + }, + 'playlist_count': 29, + } + + @classmethod + def suitable(cls, url): + return False if RayWenderlichIE.suitable(url) else super( + RayWenderlichCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_id = self._match_id(url) + + webpage = self._download_webpage(url, course_id) + + entries = [] + lesson_urls = set() + for lesson_url in re.findall( + r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage): + if lesson_url in lesson_urls: + continue + lesson_urls.add(lesson_url) + entries.append(self.url_result( + urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) + + title = self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', default=None) + + return self.playlist_result(entries, course_id, title) diff --git a/yt_dlp/extractor/rbgtum.py b/yt_dlp/extractor/rbgtum.py new file mode 100644 index 0000000..54f194c --- /dev/null +++ b/yt_dlp/extractor/rbgtum.py @@ -0,0 +1,142 @@ +import re + +from .common import InfoExtractor +from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError + + +class RbgTumIE(InfoExtractor): + _VALID_URL = r'https?://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)' + _TESTS = [{ + # Combined view + 'url': 'https://live.rbg.tum.de/w/cpp/22128', + 'md5': '53a5e7b3e07128e33bbf36687fe1c08f', + 'info_dict': { + 'id': 'cpp/22128', + 'ext': 'mp4', + 'title': 'Lecture: October 18. 2022', + 'series': 'Concepts of C++ programming (IN2377)', + } + }, { + # Presentation only + 'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES', + 'md5': '36c584272179f3e56b0db5d880639cba', + 'info_dict': { + 'id': 'I2DL/12349/PRES', + 'ext': 'mp4', + 'title': 'Lecture 3: Introduction to Neural Networks', + 'series': 'Introduction to Deep Learning (IN2346)', + } + }, { + # Camera only + 'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM', + 'md5': 'e04189d92ff2f56aedf5cede65d37aad', + 'info_dict': { + 'id': 'fvv-info/16130/CAM', + 'ext': 'mp4', + 'title': 'Fachschaftsvollversammlung', + 'series': 'Fachschaftsvollversammlung Informatik', + } + }, { + 'url': 'https://tum.live/w/linalginfo/27102', + 'only_matching': True, + }, ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8') + lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False) + lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ') + + formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + + return { + 'id': video_id, + 'title': lecture_title, + 'series': lecture_series_title, + 'formats': formats, + } + + +class RbgTumCourseIE(InfoExtractor): + _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))' + _TESTS = [{ + 'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv', + 'info_dict': { + 'title': 'Funktionale Programmierung und Verifikation (IN0003)', + 'id': '2022/S/fpv', + }, + 'params': { + 'noplaylist': False, + }, + 'playlist_count': 13, + }, { + 'url': 'https://live.rbg.tum.de/old/course/2022/W/set', + 'info_dict': { + 'title': 'SET FSMPIC', + 'id': '2022/W/set', + }, + 'params': { + 'noplaylist': False, + }, + 'playlist_count': 6, + }, { + 'url': 'https://tum.live/old/course/2023/S/linalginfo', + 'only_matching': True, + }, ] + + def _real_extract(self, url): + course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug') + meta = self._download_json( + f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False, + query={'year': year, 'term': term}) or {} + lecture_series_title = meta.get('Name') + lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE) + for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))] + + if not lectures: + webpage = self._download_webpage(url, course_id) + lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ') + lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE) + for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)] + + return self.playlist_result(lectures, course_id, lecture_series_title) + + +class RbgTumNewCourseIE(InfoExtractor): + _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?' + _TESTS = [{ + 'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3', + 'info_dict': { + 'title': 'Funktionale Programmierung und Verifikation (IN0003)', + 'id': '2022/S/fpv', + }, + 'params': { + 'noplaylist': False, + }, + 'playlist_count': 13, + }, { + 'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3', + 'info_dict': { + 'title': 'SET FSMPIC', + 'id': '2022/W/set', + }, + 'params': { + 'noplaylist': False, + }, + 'playlist_count': 6, + }, { + 'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3', + 'only_matching': True, + }] + + def _real_extract(self, url): + query = parse_qs(url) + errors = [key for key in ('year', 'term', 'slug') if not query.get(key)] + if errors: + raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}') + year, term, slug = query['year'][0], query['term'][0], query['slug'][0] + hostname = self._match_valid_url(url).group('hostname') + + return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE) diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py new file mode 100644 index 0000000..b865f63 --- /dev/null +++ b/yt_dlp/extractor/rcs.py @@ -0,0 +1,372 @@ +import re + +from .common import InfoExtractor +from ..networking import HEADRequest +from ..utils import ( + ExtractorError, + base_url, + clean_html, + extract_attributes, + get_element_html_by_class, + get_element_html_by_id, + int_or_none, + js_to_json, + mimetype2ext, + sanitize_url, + traverse_obj, + try_call, + url_basename, + urljoin, +) + + +class RCSBaseIE(InfoExtractor): + # based on VideoPlayerLoader.prototype.getVideoSrc + # and VideoPlayerLoader.prototype.transformSrc from + # https://js2.corriereobjects.it/includes2013/LIBS/js/corriere_video.sjs + _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' + _RCS_ID_RE = r'[\w-]+-\d{10}' + _MIGRATION_MAP = { + 'videoamica-vh.akamaihd': 'amica', + 'media2-amica-it.akamaized': 'amica', + 'corrierevam-vh.akamaihd': 'corriere', + 'media2vam-corriere-it.akamaized': 'corriere', + 'cormezzogiorno-vh.akamaihd': 'corrieredelmezzogiorno', + 'media2vam-mezzogiorno-corriere-it.akamaized': 'corrieredelmezzogiorno', + 'corveneto-vh.akamaihd': 'corrieredelveneto', + 'media2vam-veneto-corriere-it.akamaized': 'corrieredelveneto', + 'corbologna-vh.akamaihd': 'corrieredibologna', + 'media2vam-bologna-corriere-it.akamaized': 'corrieredibologna', + 'corfiorentino-vh.akamaihd': 'corrierefiorentino', + 'media2vam-fiorentino-corriere-it.akamaized': 'corrierefiorentino', + 'corinnovazione-vh.akamaihd': 'corriereinnovazione', + 'media2-gazzanet-gazzetta-it.akamaized': 'gazzanet', + 'videogazzanet-vh.akamaihd': 'gazzanet', + 'videogazzaworld-vh.akamaihd': 'gazzaworld', + 'gazzettavam-vh.akamaihd': 'gazzetta', + 'media2vam-gazzetta-it.akamaized': 'gazzetta', + 'videoiodonna-vh.akamaihd': 'iodonna', + 'media2-leitv-it.akamaized': 'leitv', + 'videoleitv-vh.akamaihd': 'leitv', + 'videoliving-vh.akamaihd': 'living', + 'media2-living-corriere-it.akamaized': 'living', + 'media2-oggi-it.akamaized': 'oggi', + 'videooggi-vh.akamaihd': 'oggi', + 'media2-quimamme-it.akamaized': 'quimamme', + 'quimamme-vh.akamaihd': 'quimamme', + 'videorunning-vh.akamaihd': 'running', + 'media2-style-corriere-it.akamaized': 'style', + 'style-vh.akamaihd': 'style', + 'videostyle-vh.akamaihd': 'style', + 'media2-stylepiccoli-it.akamaized': 'stylepiccoli', + 'stylepiccoli-vh.akamaihd': 'stylepiccoli', + 'doveviaggi-vh.akamaihd': 'viaggi', + 'media2-doveviaggi-it.akamaized': 'viaggi', + 'media2-vivimilano-corriere-it.akamaized': 'vivimilano', + 'vivimilano-vh.akamaihd': 'vivimilano', + 'media2-youreporter-it.akamaized': 'youreporter' + } + + def _get_video_src(self, video): + for source in traverse_obj(video, ( + 'mediaProfile', 'mediaFile', lambda _, v: v.get('mimeType'))): + url = source['value'] + for s, r in ( + ('media2vam.corriere.it.edgesuite.net', 'media2vam-corriere-it.akamaized.net'), + ('media.youreporter.it.edgesuite.net', 'media-youreporter-it.akamaized.net'), + ('corrierepmd.corriere.it.edgesuite.net', 'corrierepmd-corriere-it.akamaized.net'), + ('media2vam-corriere-it.akamaized.net/fcs.quotidiani/vr/videos/', 'video.corriere.it/vr360/videos/'), + ('http://', 'https://'), + ): + url = url.replace(s, r) + + type_ = mimetype2ext(source['mimeType']) + if type_ == 'm3u8' and '-vh.akamaihd' in url: + # still needed for some old content: see _TESTS #3 + matches = re.search(r'(?:https?:)?//(?P<host>[\w\.\-]+)\.net/i(?P<path>.+)$', url) + if matches: + url = f'https://vod.rcsobjects.it/hls/{self._MIGRATION_MAP[matches.group("host")]}{matches.group("path")}' + if traverse_obj(video, ('mediaProfile', 'geoblocking')) or ( + type_ == 'm3u8' and 'fcs.quotidiani_!' in url): + url = url.replace('vod.rcsobjects', 'vod-it.rcsobjects') + if type_ == 'm3u8' and 'vod' in url: + url = url.replace('.csmil', '.urlset') + if type_ == 'mp3': + url = url.replace('media2vam-corriere-it.akamaized.net', 'vod.rcsobjects.it/corriere') + + yield { + 'type': type_, + 'url': url, + 'bitrate': source.get('bitrate') + } + + def _create_http_formats(self, m3u8_formats, video_id): + for f in m3u8_formats: + if f['vcodec'] == 'none': + continue + http_url = re.sub(r'(https?://[^/]+)/hls/([^?#]+?\.mp4).+', r'\g<1>/\g<2>', f['url']) + if http_url == f['url']: + continue + + http_f = f.copy() + del http_f['manifest_url'] + format_id = try_call(lambda: http_f['format_id'].replace('hls-', 'https-')) + urlh = self._request_webpage(HEADRequest(http_url), video_id, fatal=False, + note=f'Check filesize for {format_id}') + if not urlh: + continue + + http_f.update({ + 'format_id': format_id, + 'url': http_url, + 'protocol': 'https', + 'filesize_approx': int_or_none(urlh.headers.get('Content-Length', None)), + }) + yield http_f + + def _create_formats(self, sources, video_id): + for source in sources: + if source['type'] == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + source['url'], video_id, 'mp4', m3u8_id='hls', fatal=False) + yield from m3u8_formats + yield from self._create_http_formats(m3u8_formats, video_id) + elif source['type'] == 'mp3': + yield { + 'format_id': 'https-mp3', + 'ext': 'mp3', + 'acodec': 'mp3', + 'vcodec': 'none', + 'abr': source.get('bitrate'), + 'url': source['url'], + } + + def _real_extract(self, url): + cdn, video_id = self._match_valid_url(url).group('cdn', 'id') + display_id, video_data = None, None + + if re.match(self._UUID_RE, video_id) or re.match(self._RCS_ID_RE, video_id): + url = f'https://video.{cdn}/video-json/{video_id}' + else: + webpage = self._download_webpage(url, video_id) + data_config = get_element_html_by_id('divVideoPlayer', webpage) or get_element_html_by_class('divVideoPlayer', webpage) + + if data_config: + data_config = self._parse_json( + extract_attributes(data_config).get('data-config'), + video_id, fatal=False) or {} + if data_config.get('newspaper'): + cdn = f'{data_config["newspaper"]}.it' + display_id, video_id = video_id, data_config.get('uuid') or video_id + url = f'https://video.{cdn}/video-json/{video_id}' + else: + json_url = self._search_regex( + r'''(?x)url\s*=\s*(["']) + (?P<url> + (?:https?:)?//video\.rcs\.it + /fragment-includes/video-includes/[^"']+?\.json + )\1;''', + webpage, video_id, group='url', default=None) + if json_url: + video_data = self._download_json(sanitize_url(json_url, scheme='https'), video_id) + display_id, video_id = video_id, video_data.get('id') or video_id + + if not video_data: + webpage = self._download_webpage(url, video_id) + + video_data = self._search_json( + '##start-video##', webpage, 'video data', video_id, default=None, + end_pattern='##end-video##', transform_source=js_to_json) + + if not video_data: + # try search for iframes + emb = RCSEmbedsIE._extract_url(webpage) + if emb: + return { + '_type': 'url_transparent', + 'url': emb, + 'ie_key': RCSEmbedsIE.ie_key() + } + + if not video_data: + raise ExtractorError('Video data not found in the page') + + return { + 'id': video_id, + 'display_id': display_id, + 'title': video_data.get('title'), + 'description': (clean_html(video_data.get('description')) + or clean_html(video_data.get('htmlDescription')) + or self._html_search_meta('description', webpage)), + 'uploader': video_data.get('provider') or cdn, + 'formats': list(self._create_formats(self._get_video_src(video_data), video_id)), + } + + +class RCSEmbedsIE(RCSBaseIE): + _VALID_URL = r'''(?x) + https?://(?P<vid>video)\. + (?P<cdn> + (?: + rcs| + (?:corriere\w+\.)?corriere| + (?:gazzanet\.)?gazzetta + )\.it) + /video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)''' + _EMBED_REGEX = [r'''(?x) + (?: + data-frame-src=| + <iframe[^\n]+src= + ) + (["']) + (?P<url>(?:https?:)?//video\. + (?: + rcs| + (?:corriere\w+\.)?corriere| + (?:gazzanet\.)?gazzetta + ) + \.it/video-embed/.+?) + \1'''] + _TESTS = [{ + 'url': 'https://video.rcs.it/video-embed/iodonna-0001585037', + 'md5': '0faca97df525032bb9847f690bc3720c', + 'info_dict': { + 'id': 'iodonna-0001585037', + 'ext': 'mp4', + 'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"', + 'description': 'md5:65b09633df9ffee57f48b39e34c9e067', + 'uploader': 'rcs.it', + } + }, { + 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789', + 'only_matching': True + }, { + 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140', + 'only_matching': True + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.iodonna.it/video-iodonna/personaggi-video/monica-bellucci-piu-del-lavoro-oggi-per-me-sono-importanti-lamicizia-e-la-famiglia/', + 'info_dict': { + 'id': 'iodonna-0002033648', + 'ext': 'mp4', + 'title': 'Monica Bellucci: «Più del lavoro, oggi per me sono importanti l\'amicizia e la famiglia»', + 'description': 'md5:daea6d9837351e56b1ab615c06bebac1', + 'uploader': 'rcs.it', + } + }] + + @staticmethod + def _sanitize_url(url): + url = sanitize_url(url, scheme='https') + return urljoin(base_url(url), url_basename(url)) + + @classmethod + def _extract_embed_urls(cls, url, webpage): + return map(cls._sanitize_url, super()._extract_embed_urls(url, webpage)) + + +class RCSIE(RCSBaseIE): + _VALID_URL = r'''(?x)https?://(?P<vid>video|viaggi)\. + (?P<cdn> + (?: + corrieredelmezzogiorno\. + |corrieredelveneto\. + |corrieredibologna\. + |corrierefiorentino\. + )?corriere\.it + |(?:gazzanet\.)?gazzetta\.it) + /(?!video-embed/)[^?#]+?/(?P<id>[^/\?]+)(?=\?|/$|$)''' + _TESTS = [{ + # json iframe directly from id + 'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb', + 'md5': '14946840dec46ecfddf66ba4eea7d2b2', + 'info_dict': { + 'id': 'b727632a-f9d0-11ea-91b0-38d50a849abb', + 'ext': 'mp4', + 'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante', + 'description': 'md5:3915ce5ebb3d2571deb69a5eb85ac9b5', + 'uploader': 'Corriere Tv', + } + }, { + # search for video id inside the page + 'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/', + 'md5': 'f22a92d9e666e80f2fffbf2825359c81', + 'info_dict': { + 'id': '5b7cd134-e2c1-11ea-89b3-b56dd0df2aa2', + 'display_id': 'norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen', + 'ext': 'mp4', + 'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen', + 'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8', + 'uploader': 'DOVE Viaggi', + } + }, { + # only audio format https://github.com/yt-dlp/yt-dlp/issues/5683 + 'url': 'https://video.corriere.it/cronaca/audio-telefonata-il-papa-becciu-santita-lettera-che-mi-ha-inviato-condanna/b94c0d20-70c2-11ed-9572-e4b947a0ebd2', + 'md5': 'aaffb08d02f2ce4292a4654694c78150', + 'info_dict': { + 'id': 'b94c0d20-70c2-11ed-9572-e4b947a0ebd2', + 'ext': 'mp3', + 'title': 'L\'audio della telefonata tra il Papa e Becciu: «Santità, la lettera che mi ha inviato è una condanna»', + 'description': 'md5:c0ddb61bd94a8d4e0d4bb9cda50a689b', + 'uploader': 'Corriere Tv', + 'formats': [{'format_id': 'https-mp3', 'ext': 'mp3'}], + } + }, { + # old content still needs cdn migration + 'url': 'https://viaggi.corriere.it/video/milano-varallo-sesia-sul-treno-a-vapore/', + 'md5': '2dfdce7af249654ad27eeba03fe1e08d', + 'info_dict': { + 'id': 'd8f6c8d0-f7d7-11e8-bfca-f74cf4634191', + 'display_id': 'milano-varallo-sesia-sul-treno-a-vapore', + 'ext': 'mp4', + 'title': 'Milano-Varallo Sesia sul treno a vapore', + 'description': 'md5:6348f47aac230397fe341a74f7678d53', + 'uploader': 'DOVE Viaggi', + } + }, { + 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945', + 'only_matching': True + }] + + +class RCSVariousIE(RCSBaseIE): + _VALID_URL = r'''(?x)https?://www\. + (?P<cdn> + leitv\.it| + youreporter\.it| + amica\.it + )/(?:[^/]+/)?(?P<id>[^/]+?)(?:$|\?|/)''' + _TESTS = [{ + 'url': 'https://www.leitv.it/benessere/mal-di-testa/', + 'md5': '3b7a683d105a7313ec7513b014443631', + 'info_dict': { + 'id': 'leitv-0000125151', + 'display_id': 'mal-di-testa', + 'ext': 'mp4', + 'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto', + 'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5', + 'uploader': 'leitv.it', + } + }, { + 'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/', + 'md5': '3989b6d603482611a2abd2f32b79f739', + 'info_dict': { + 'id': 'youreporter-0000332574', + 'display_id': 'fiume-sesia-3-ottobre-2020', + 'ext': 'mp4', + 'title': 'Fiume Sesia 3 ottobre 2020', + 'description': 'md5:0070eef1cc884d13c970a4125063de55', + 'uploader': 'youreporter.it', + } + }, { + 'url': 'https://www.amica.it/video-post/saint-omer-al-cinema-il-film-leone-dargento-che-ribalta-gli-stereotipi/', + 'md5': '187cce524dfd0343c95646c047375fc4', + 'info_dict': { + 'id': 'amica-0001225365', + 'display_id': 'saint-omer-al-cinema-il-film-leone-dargento-che-ribalta-gli-stereotipi', + 'ext': 'mp4', + 'title': '"Saint Omer": al cinema il film Leone d\'argento che ribalta gli stereotipi', + 'description': 'md5:b1c8869c2dcfd6073a2a311ba0008aa8', + 'uploader': 'rcs.it', + } + }] diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py new file mode 100644 index 0000000..6a7c7f3 --- /dev/null +++ b/yt_dlp/extractor/rcti.py @@ -0,0 +1,373 @@ +import json +import random +import time + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + dict_get, + ExtractorError, + strip_or_none, + traverse_obj, + try_get +) + + +class RCTIPlusBaseIE(InfoExtractor): + def _real_initialize(self): + self._AUTH_KEY = self._download_json( + 'https://api.rctiplus.com/api/v1/visitor?platform=web', # platform can be web, mweb, android, ios + None, 'Fetching authorization key')['data']['access_token'] + + def _call_api(self, url, video_id, note=None): + json = self._download_json( + url, video_id, note=note, headers={'Authorization': self._AUTH_KEY}) + if json.get('status', {}).get('code', 0) != 0: + raise ExtractorError(f'{self.IE_NAME} said: {json["status"]["message_client"]}', cause=json) + return json.get('data'), json.get('meta') + + +class RCTIPlusIE(RCTIPlusBaseIE): + _VALID_URL = r'https?://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola', + 'md5': '56ed45affad45fa18d5592a1bc199997', + 'info_dict': { + 'id': 'v_e22124', + 'title': 'Untuk Lola', + 'display_id': 'untuk-lola', + 'description': 'md5:2b809075c0b1e071e228ad6d13e41deb', + 'ext': 'mp4', + 'duration': 1400, + 'timestamp': 1615978800, + 'upload_date': '20210317', + 'series': 'Kiko : Untuk Lola', + 'season_number': 1, + 'episode_number': 1, + 'channel': 'RCTI', + }, + 'params': { + 'fixup': 'never', + }, + }, { # Clip; Series title doesn't appear on metadata JSON + 'url': 'https://www.rctiplus.com/programs/316/cahaya-terindah/clip/3921/make-a-wish', + 'md5': 'd179b2ff356f0e91a53bcc6a4d8504f0', + 'info_dict': { + 'id': 'v_c3921', + 'title': 'Make A Wish', + 'display_id': 'make-a-wish', + 'description': 'Make A Wish', + 'ext': 'mp4', + 'duration': 288, + 'timestamp': 1571652600, + 'upload_date': '20191021', + 'series': 'Cahaya Terindah', + 'channel': 'RCTI', + }, + 'params': { + 'fixup': 'never', + }, + }, { # Extra + 'url': 'https://www.rctiplus.com/programs/616/inews-malam/extra/9438/diungkapkan-melalui-surat-terbuka-ceo-ruangguru-belva-devara-mundur-dari-staf-khusus-presiden', + 'md5': 'c48106afdbce609749f5e0c007d9278a', + 'info_dict': { + 'id': 'v_ex9438', + 'title': 'md5:2ede828c0f8bde249e0912be150314ca', + 'display_id': 'md5:62b8d4e9ff096db527a1ad797e8a9933', + 'description': 'md5:2ede828c0f8bde249e0912be150314ca', + 'ext': 'mp4', + 'duration': 93, + 'timestamp': 1587561540, + 'upload_date': '20200422', + 'series': 'iNews Malam', + 'channel': 'INews', + }, + }, { # Missed event/replay + 'url': 'https://www.rctiplus.com/missed-event/2507/mou-signing-ceremony-27-juli-2021-1400-wib', + 'md5': '649c5f27250faed1452ca8b91e06922d', + 'info_dict': { + 'id': 'v_pe2507', + 'title': 'MOU Signing Ceremony | 27 Juli 2021 | 14.00 WIB', + 'display_id': 'mou-signing-ceremony-27-juli-2021-1400-wib', + 'ext': 'mp4', + 'timestamp': 1627142400, + 'upload_date': '20210724', + 'was_live': True, + 'release_timestamp': 1627369200, + }, + 'params': { + 'fixup': 'never', + }, + }, { # Live event; Cloudfront CDN + 'url': 'https://www.rctiplus.com/live-event/2530/dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', + 'info_dict': { + 'id': 'v_le2530', + 'title': 'Dai Muda : Charging Imun dengan Iman | 4 Agustus 2021 | 16.00 WIB', + 'display_id': 'dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', + 'ext': 'mp4', + 'timestamp': 1627898400, + 'upload_date': '20210802', + 'release_timestamp': 1628067600, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'This live event has ended.', + }, { # TV; live_at is null + 'url': 'https://www.rctiplus.com/live-event/1/rcti', + 'info_dict': { + 'id': 'v_lt1', + 'title': 'RCTI', + 'display_id': 'rcti', + 'ext': 'mp4', + 'timestamp': 1546344000, + 'upload_date': '20190101', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, + }] + _CONVIVA_JSON_TEMPLATE = { + 't': 'CwsSessionHb', + 'cid': 'ff84ae928c3b33064b76dec08f12500465e59a6f', + 'clid': '0', + 'sid': 0, + 'seq': 0, + 'caps': 0, + 'sf': 7, + 'sdk': True, + } + + def _real_extract(self, url): + match = self._match_valid_url(url).groupdict() + video_type, video_id, display_id = match['type'], match['id'], match['display_id'] + + url_api_version = 'v2' if video_type == 'missed-event' else 'v1' + appier_id = '23984824_' + str(random.randint(0, 10000000000)) # Based on the webpage's uuidRandom generator + video_json = self._call_api( + f'https://api.rctiplus.com/api/{url_api_version}/{video_type}/{video_id}/url?appierid={appier_id}', display_id, 'Downloading video URL JSON')[0] + video_url = video_json['url'] + + is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['live_at']) + if is_upcoming is None: + is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date']) + if is_upcoming: + self.raise_no_formats( + 'This event will start at %s.' % video_json['live_label'] if video_json.get('live_label') else 'This event has not started yet.', expected=True) + if 'akamaized' in video_url: + # For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API + conviva_json_data = { + **self._CONVIVA_JSON_TEMPLATE, + 'url': video_url, + 'sst': int(time.time()) + } + conviva_json_res = self._download_json( + 'https://ff84ae928c3b33064b76dec08f12500465e59a6f.cws.conviva.com/0/wsg', display_id, + 'Creating Conviva session', 'Failed to create Conviva session', + fatal=False, data=json.dumps(conviva_json_data).encode('utf-8')) + if conviva_json_res and conviva_json_res.get('err') != 'ok': + self.report_warning('Conviva said: %s' % str(conviva_json_res.get('err'))) + + video_meta, meta_paths = self._call_api( + 'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata') + + thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/') + if video_meta.get('portrait_image'): + thumbnails.append({ + 'id': 'portrait_image', + 'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image']) # 2000px seems to be the highest resolution that can be given + }) + if video_meta.get('landscape_image'): + thumbnails.append({ + 'id': 'landscape_image', + 'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image']) + }) + try: + formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'}) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + self.raise_geo_restricted(countries=['ID'], metadata_available=True) + else: + raise e + for f in formats: + if 'akamaized' in f['url'] or 'cloudfront' in f['url']: + f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs + + return { + 'id': video_meta.get('product_id') or video_json.get('product_id'), + 'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')), + 'display_id': display_id, + 'description': video_meta.get('summary'), + 'timestamp': video_meta.get('release_date') or video_json.get('start_date'), + 'duration': video_meta.get('duration'), + 'categories': [video_meta['genre']] if video_meta.get('genre') else None, + 'average_rating': video_meta.get('star_rating'), + 'series': video_meta.get('program_title') or video_json.get('program_title'), + 'season_number': video_meta.get('season'), + 'episode_number': video_meta.get('episode'), + 'channel': video_json.get('tv_name'), + 'channel_id': video_json.get('tv_id'), + 'formats': formats, + 'thumbnails': thumbnails, + 'is_live': video_type == 'live-event' and not is_upcoming, + 'was_live': video_type == 'missed-event', + 'live_status': 'is_upcoming' if is_upcoming else None, + 'release_timestamp': video_json.get('live_at'), + } + + +class RCTIPlusSeriesIE(RCTIPlusBaseIE): + _VALID_URL = r'https?://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:/(?P<type>episodes|extras|clips))?' + _TESTS = [{ + 'url': 'https://www.rctiplus.com/programs/829/putri-untuk-pangeran', + 'playlist_mincount': 1019, + 'info_dict': { + 'id': '829', + 'title': 'Putri Untuk Pangeran', + 'description': 'md5:aca7b54d05bd95a67d4f4613cc1d622d', + 'age_limit': 2, + 'cast': ['Verrel Bramasta', 'Ranty Maria', 'Riza Syah', 'Ivan Fadilla', 'Nicole Parham', 'Dll', 'Aviv Elham'], + 'display_id': 'putri-untuk-pangeran', + 'tags': 'count:18', + }, + }, { # No episodes + 'url': 'https://www.rctiplus.com/programs/615/inews-pagi', + 'playlist_mincount': 388, + 'info_dict': { + 'id': '615', + 'title': 'iNews Pagi', + 'description': 'md5:f18ee3d4643cfb41c358e5a9b693ee04', + 'age_limit': 2, + 'tags': 'count:11', + 'display_id': 'inews-pagi', + } + }] + _AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings + 'S-SU': 2, + 'SU': 2, + 'P': 2, + 'A': 7, + 'R': 13, + 'R-R/1': 17, # Labelled as 17+ despite being R + 'D': 18, + } + + @classmethod + def suitable(cls, url): + return False if RCTIPlusIE.suitable(url) else super(RCTIPlusSeriesIE, cls).suitable(url) + + def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}): + total_pages = 0 + try: + total_pages = self._call_api( + '%s&length=20&page=0' % url, + display_id, note)[1]['pagination']['total_page'] + except ExtractorError as e: + if 'not found' in str(e): + return [] + raise e + if total_pages <= 0: + return [] + + for page_num in range(1, total_pages + 1): + episode_list = self._call_api( + '%s&length=20&page=%s' % (url, page_num), + display_id, '%s page %s' % (note, page_num))[0] or [] + + for video_json in episode_list: + yield { + '_type': 'url', + 'url': video_json['share_link'], + 'ie_key': RCTIPlusIE.ie_key(), + 'id': video_json.get('product_id'), + 'title': video_json.get('title'), + 'display_id': video_json.get('title_code').replace('_', '-'), + 'description': video_json.get('summary'), + 'timestamp': video_json.get('release_date'), + 'duration': video_json.get('duration'), + 'season_number': video_json.get('season'), + 'episode_number': video_json.get('episode'), + **metadata + } + + def _series_entries(self, series_id, display_id=None, video_type=None, metadata={}): + if not video_type or video_type in 'episodes': + try: + seasons_list = self._call_api( + f'https://api.rctiplus.com/api/v1/program/{series_id}/season', + display_id, 'Downloading seasons list JSON')[0] + except ExtractorError as e: + if 'not found' not in str(e): + raise + seasons_list = [] + for season in seasons_list: + yield from self._entries( + f'https://api.rctiplus.com/api/v2/program/{series_id}/episode?season={season["season"]}', + display_id, f'Downloading season {season["season"]} episode entries', metadata) + if not video_type or video_type in 'extras': + yield from self._entries( + f'https://api.rctiplus.com/api/v2/program/{series_id}/extra?content_id=0', + display_id, 'Downloading extra entries', metadata) + if not video_type or video_type in 'clips': + yield from self._entries( + f'https://api.rctiplus.com/api/v2/program/{series_id}/clip?content_id=0', + display_id, 'Downloading clip entries', metadata) + + def _real_extract(self, url): + series_id, display_id, video_type = self._match_valid_url(url).group('id', 'display_id', 'type') + if video_type: + self.report_warning( + f'Only {video_type} will be downloaded. ' + f'To download everything from the series, remove "/{video_type}" from the URL') + + series_meta, meta_paths = self._call_api( + f'https://api.rctiplus.com/api/v1/program/{series_id}/detail', display_id, 'Downloading series metadata') + metadata = { + 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]), + 'cast': traverse_obj(series_meta, (('starring', 'creator', 'writer'), ..., 'name'), + expected_type=lambda x: strip_or_none(x) or None), + 'tags': traverse_obj(series_meta, ('tag', ..., 'name'), + expected_type=lambda x: strip_or_none(x) or None), + } + return self.playlist_result( + self._series_entries(series_id, display_id, video_type, metadata), series_id, + series_meta.get('title'), series_meta.get('summary'), display_id=display_id, **metadata) + + +class RCTIPlusTVIE(RCTIPlusBaseIE): + _VALID_URL = r'https?://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))' + _TESTS = [{ + 'url': 'https://www.rctiplus.com/tv/rcti', + 'info_dict': { + 'id': 'v_lt1', + 'title': 'RCTI', + 'ext': 'mp4', + 'timestamp': 1546344000, + 'upload_date': '20190101', + }, + 'params': { + 'skip_download': True, + } + }, { + # Returned video will always change + 'url': 'https://www.rctiplus.com/live-event', + 'only_matching': True, + }, { + # Returned video will also always change + 'url': 'https://www.rctiplus.com/missed-event', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if RCTIPlusIE.suitable(url) else super(RCTIPlusTVIE, cls).suitable(url) + + def _real_extract(self, url): + match = self._match_valid_url(url).groupdict() + tv_id = match.get('tvname') or match.get('eventname') + webpage = self._download_webpage(url, tv_id) + video_type, video_id = self._search_regex( + r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url', + webpage, 'video link', group=('type', 'id')) + return self.url_result(f'https://www.rctiplus.com/{video_type}/{video_id}/{tv_id}', 'RCTIPlus') diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py new file mode 100644 index 0000000..1a1c663 --- /dev/null +++ b/yt_dlp/extractor/rds.py @@ -0,0 +1,68 @@ +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_iso8601, + js_to_json, +) +from ..compat import compat_str + + +class RDSIE(InfoExtractor): + _WORKING = False + IE_DESC = 'RDS.ca' + _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+' + + _TESTS = [{ + # has two 9c9media ContentPackages, the web player selects the first ContentPackage + 'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606', + 'info_dict': { + 'id': '2083309', + 'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande', + 'ext': 'flv', + 'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande', + 'description': 'md5:83fa38ecc4a79b19e433433254077f25', + 'timestamp': 1606129030, + 'upload_date': '20201123', + 'duration': 773.039, + } + }, { + 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) + video_id = compat_str(item['id']) + title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( + 'title', webpage, 'title', fatal=True) + description = self._og_search_description(webpage) or self._html_search_meta( + 'description', webpage, 'description') + thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex( + [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', + r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], + webpage, 'thumbnail', fatal=False) + timestamp = parse_iso8601(self._search_regex( + r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"', + webpage, 'upload date', fatal=False)) + duration = parse_duration(self._search_regex( + r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"', + webpage, 'duration', fatal=False)) + age_limit = self._family_friendly_search(webpage) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'display_id': display_id, + 'url': '9c9media:rds_web:%s' % video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'age_limit': age_limit, + 'ie_key': 'NineCNineMedia', + } diff --git a/yt_dlp/extractor/redbee.py b/yt_dlp/extractor/redbee.py new file mode 100644 index 0000000..4d71133 --- /dev/null +++ b/yt_dlp/extractor/redbee.py @@ -0,0 +1,380 @@ +import json +import re +import time +import urllib.parse +import uuid + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + strip_or_none, + traverse_obj, + try_call, + unified_timestamp, +) + + +class RedBeeBaseIE(InfoExtractor): + _DEVICE_ID = str(uuid.uuid4()) + + @property + def _API_URL(self): + """ + Ref: https://apidocs.emp.ebsd.ericsson.net + Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT + """ + return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}' + + def _get_bearer_token(self, asset_id, jwt=None): + request = { + 'deviceId': self._DEVICE_ID, + 'device': { + 'deviceId': self._DEVICE_ID, + 'name': 'Mozilla Firefox 102', + 'type': 'WEB', + }, + } + if jwt: + request['jwt'] = jwt + + return self._download_json( + f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}', + asset_id, data=json.dumps(request).encode('utf-8'), headers={ + 'Content-Type': 'application/json;charset=utf-8' + })['sessionToken'] + + def _get_formats_and_subtitles(self, asset_id, **kwargs): + bearer_token = self._get_bearer_token(asset_id, **kwargs) + api_response = self._download_json( + f'{self._API_URL}/entitlement/{asset_id}/play', + asset_id, headers={ + 'Authorization': f'Bearer {bearer_token}', + 'Accept': 'application/json, text/plain, */*' + }) + + formats, subtitles = [], {} + for format in api_response['formats']: + if not format.get('mediaLocator'): + continue + + fmts, subs = [], {} + if format.get('format') == 'DASH': + fmts, subs = self._extract_mpd_formats_and_subtitles( + format['mediaLocator'], asset_id, fatal=False) + elif format.get('format') == 'SMOOTHSTREAMING': + fmts, subs = self._extract_ism_formats_and_subtitles( + format['mediaLocator'], asset_id, fatal=False) + elif format.get('format') == 'HLS': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + format['mediaLocator'], asset_id, fatal=False) + + if format.get('drm'): + for f in fmts: + f['has_drm'] = True + + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return formats, subtitles + + +class ParliamentLiveUKIE(RedBeeBaseIE): + IE_NAME = 'parliamentlive.tv' + IE_DESC = 'UK parliament videos' + _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + + _REDBEE_CUSTOMER = 'UKParliament' + _REDBEE_BUSINESS_UNIT = 'ParliamentLive' + + _TESTS = [{ + 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', + 'info_dict': { + 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', + 'ext': 'mp4', + 'title': 'Home Affairs Committee', + 'timestamp': 1395153872, + 'upload_date': '20140318', + 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail', + }, + }, { + 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', + 'only_matching': True, + }, { + 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377', + 'info_dict': { + 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377', + 'ext': 'mp4', + 'title': 'House of Commons', + 'timestamp': 1658392447, + 'upload_date': '20220721', + 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + formats, subtitles = self._get_formats_and_subtitles(video_id) + + video_info = self._download_json( + f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False) + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': traverse_obj(video_info, ('event', 'title')), + 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'), + 'timestamp': traverse_obj( + video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp), + '_format_sort_fields': ('res', 'proto'), + } + + +class RTBFIE(RedBeeBaseIE): + _WORKING = False + _VALID_URL = r'''(?x) + https?://(?:www\.)?rtbf\.be/ + (?: + video/[^?]+\?.*\bid=| + ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| + auvio/[^/]+\?.*\b(?P<live>l)?id= + )(?P<id>\d+)''' + _NETRC_MACHINE = 'rtbf' + + _REDBEE_CUSTOMER = 'RTBF' + _REDBEE_BUSINESS_UNIT = 'Auvio' + + _TESTS = [{ + 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', + 'md5': '8c876a1cceeb6cf31b476461ade72384', + 'info_dict': { + 'id': '1921274', + 'ext': 'mp4', + 'title': 'Les Diables au coeur (épisode 2)', + 'description': '(du 25/04/2014)', + 'duration': 3099.54, + 'upload_date': '20140425', + 'timestamp': 1398456300, + }, + 'skip': 'No longer available', + }, { + # geo restricted + 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442', + 'only_matching': True, + }, { + 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', + 'only_matching': True, + }, { + 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', + 'only_matching': True, + }, { + # Live + 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775', + 'only_matching': True, + }, { + # Audio + 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811', + 'only_matching': True, + }, { + # With Subtitle + 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588', + 'only_matching': True, + }, { + 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926', + 'md5': 'd5d11bb62169fef38d7ce7ac531e034f', + 'info_dict': { + 'id': '2921926', + 'ext': 'mp4', + 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme', + 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52', + 'duration': 5258.8, + 'upload_date': '20220727', + 'timestamp': 1658934000, + 'series': '#Investigation', + 'thumbnail': r're:^https?://[^?&]+\.jpg$', + }, + }, { + 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492', + 'md5': '054f9f143bc79c89647c35e5a7d35fa8', + 'info_dict': { + 'id': '2920492', + 'ext': 'mp4', + 'title': '04 - Le crime de la rue Royale', + 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6', + 'duration': 1574.6, + 'upload_date': '20220723', + 'timestamp': 1658596887, + 'series': 'La Belgique criminelle - TV', + 'thumbnail': r're:^https?://[^?&]+\.jpg$', + }, + }] + + _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' + _PROVIDERS = { + 'YOUTUBE': 'Youtube', + 'DAILYMOTION': 'Dailymotion', + 'VIMEO': 'Vimeo', + } + _QUALITIES = [ + ('mobile', 'SD'), + ('web', 'MD'), + ('high', 'HD'), + ] + _LOGIN_URL = 'https://login.rtbf.be/accounts.login' + _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO' + _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}' + + def _perform_login(self, username, password): + if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID): + return + + self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600) + + login_response = self._download_json( + self._LOGIN_URL, None, data=urllib.parse.urlencode({ + 'loginID': username, + 'password': password, + 'APIKey': self._GIGYA_API_KEY, + 'targetEnv': 'jssdk', + 'sessionExpiration': '-2', + }).encode('utf-8'), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + + if login_response['statusCode'] != 200: + raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True) + + self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'], + secure=True, expire_time=time.time() + 3600) + + def _get_formats_and_subtitles(self, url, media_id): + login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID) + if not login_token: + self.raise_login_required() + + session_jwt = try_call(lambda: self._get_cookies(url)['rtbf_jwt'].value) or self._download_json( + 'https://login.rtbf.be/accounts.getJWT', media_id, query={ + 'login_token': login_token.value, + 'APIKey': self._GIGYA_API_KEY, + 'sdk': 'js_latest', + 'authMode': 'cookie', + 'pageURL': url, + 'sdkBuild': '13273', + 'format': 'json', + })['id_token'] + + return super()._get_formats_and_subtitles(media_id, jwt=session_jwt) + + def _real_extract(self, url): + live, media_id = self._match_valid_url(url).groups() + embed_page = self._download_webpage( + 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), + media_id, query={'id': media_id}) + + media_data = self._html_search_regex(r'data-media="([^"]+)"', embed_page, 'media data', fatal=False) + if not media_data: + if re.search(r'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page): + raise ExtractorError('Livestream has ended.', expected=True) + if re.search(r'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page): + self.raise_login_required() + + raise ExtractorError('Could not find media data') + + data = self._parse_json(media_data, media_id) + + error = data.get('error') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + provider = data.get('provider') + if provider in self._PROVIDERS: + return self.url_result(data['url'], self._PROVIDERS[provider]) + + title = traverse_obj(data, 'subtitle', 'title') + is_live = data.get('isLive') + height_re = r'-(\d+)p\.' + formats, subtitles = [], {} + + # The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake' + # since all they contain is a 20s video that is completely unrelated. + # https://github.com/yt-dlp/yt-dlp/issues/4656#issuecomment-1214461092 + m3u8_url = None if data.get('isLive') else traverse_obj(data, 'urlHlsAes128', 'urlHls') + if m3u8_url: + fmts, subs = self._extract_m3u8_formats_and_subtitles( + m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x + http_url = data.get('url') + if formats and http_url and re.search(height_re, http_url): + http_url = fix_url(http_url) + for m3u8_f in formats[:]: + height = m3u8_f.get('height') + if not height: + continue + f = m3u8_f.copy() + del f['protocol'] + f.update({ + 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'), + 'url': re.sub(height_re, '-%dp.' % height, http_url), + }) + formats.append(f) + else: + sources = data.get('sources') or {} + for key, format_id in self._QUALITIES: + format_url = sources.get(key) + if not format_url: + continue + height = int_or_none(self._search_regex( + height_re, format_url, 'height', default=None)) + formats.append({ + 'format_id': format_id, + 'url': fix_url(format_url), + 'height': height, + }) + + mpd_url = None if data.get('isLive') else data.get('urlDash') + if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')): + fmts, subs = self._extract_mpd_formats_and_subtitles( + mpd_url, media_id, mpd_id='dash', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + audio_url = data.get('urlAudio') + if audio_url: + formats.append({ + 'format_id': 'audio', + 'url': audio_url, + 'vcodec': 'none', + }) + + for track in (data.get('tracks') or {}).values(): + sub_url = track.get('url') + if not sub_url: + continue + subtitles.setdefault(track.get('lang') or 'fr', []).append({ + 'url': sub_url, + }) + + if not formats: + fmts, subs = self._get_formats_and_subtitles(url, f'live_{media_id}' if is_live else media_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': media_id, + 'formats': formats, + 'title': title, + 'description': strip_or_none(data.get('description')), + 'thumbnail': data.get('thumbnail'), + 'duration': float_or_none(data.get('realDuration')), + 'timestamp': int_or_none(data.get('liveFrom')), + 'series': data.get('programLabel'), + 'subtitles': subtitles, + 'is_live': is_live, + '_format_sort_fields': ('res', 'proto'), + } diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py new file mode 100644 index 0000000..d1de249 --- /dev/null +++ b/yt_dlp/extractor/redbulltv.py @@ -0,0 +1,224 @@ +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + float_or_none, + ExtractorError, +) + + +class RedBullTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?P<id>AP-\w+)' + _TESTS = [{ + # film + 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', + 'md5': 'fb0445b98aa4394e504b413d98031d1f', + 'info_dict': { + 'id': 'AP-1Q6XCDTAN1W11', + 'ext': 'mp4', + 'title': 'ABC of... WRC - ABC of... S1E6', + 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', + 'duration': 1582.04, + }, + }, { + # episode + 'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11', + 'info_dict': { + 'id': 'AP-1PMHKJFCW1W11', + 'ext': 'mp4', + 'title': 'Grime - Hashtags S2E4', + 'description': 'md5:5546aa612958c08a98faaad4abce484d', + 'duration': 904, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11', + 'only_matching': True, + }] + + def extract_info(self, video_id): + session = self._download_json( + 'https://api.redbull.tv/v3/session', video_id, + note='Downloading access token', query={ + 'category': 'personal_computer', + 'os_family': 'http', + }) + if session.get('code') == 'error': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, session['message'])) + token = session['token'] + + try: + video = self._download_json( + 'https://api.redbull.tv/v3/products/' + video_id, + video_id, note='Downloading video information', + headers={'Authorization': token} + ) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 404: + error_message = self._parse_json( + e.cause.response.read().decode(), video_id)['error'] + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error_message), expected=True) + raise + + title = video['title'].strip() + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token), + video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + + for resource in video.get('resources', []): + if resource.startswith('closed_caption_'): + splitted_resource = resource.split('_') + if splitted_resource[2]: + subtitles.setdefault('en', []).append({ + 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource), + 'ext': splitted_resource[2], + }) + + subheading = video.get('subheading') + if subheading: + title += ' - %s' % subheading + + return { + 'id': video_id, + 'title': title, + 'description': video.get('long_description') or video.get( + 'short_description'), + 'duration': float_or_none(video.get('duration'), scale=1000), + 'formats': formats, + 'subtitles': subtitles, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.extract_info(video_id) + + +class RedBullEmbedIE(RedBullTVIE): # XXX: Do not subclass from concrete IE + _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})' + _TESTS = [{ + # HLS manifest accessible only using assetId + 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT', + 'only_matching': True, + }] + _VIDEO_ESSENSE_TMPL = '''... on %s { + videoEssence { + attributes + } + }''' + + def _real_extract(self, url): + rrn_id = self._match_id(url) + asset_id = self._download_json( + 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', + rrn_id, headers={ + 'Accept': 'application/json', + 'API-KEY': 'e90a1ff11335423998b100c929ecc866', + }, query={ + 'query': '''{ + resource(id: "%s", enforceGeoBlocking: false) { + %s + %s + } +}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'), + })['data']['resource']['videoEssence']['attributes']['assetId'] + return self.extract_info(asset_id) + + +class RedBullTVRrnContentIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/tv/(?:video|live|film)/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TESTS = [{ + 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me', + 'only_matching': True, + }] + + def _real_extract(self, url): + region, lang, rrn_id = self._match_valid_url(url).groups() + rrn_id += ':%s-%s' % (lang, region.upper()) + return self.url_result( + 'https://www.redbull.com/embed/' + rrn_id, + RedBullEmbedIE.ie_key(), rrn_id) + + +class RedBullIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/(?P<type>(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04', + 'md5': 'db8271a7200d40053a1809ed0dd574ff', + 'info_dict': { + 'id': 'AA-1MT8DQWA91W14', + 'ext': 'mp4', + 'title': 'Grime - Hashtags S2E4', + 'description': 'md5:5546aa612958c08a98faaad4abce484d', + }, + }, { + 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william', + 'only_matching': True, + }, { + # only available on the int-en website so a fallback is need for the API + # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero + 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia', + 'only_matching': True, + }] + _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr'] + _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe'] + + def _real_extract(self, url): + region, lang, filter_type, display_id = self._match_valid_url(url).groups() + if filter_type == 'episodes': + filter_type = 'episode-videos' + elif filter_type == 'live': + filter_type = 'live-videos' + + regions = [region.upper()] + if region != 'int': + if region in self._LAT_FALLBACK_MAP: + regions.append('LAT') + if lang in self._INT_FALLBACK_LIST: + regions.append('INT') + locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions]) + + rrn_id = self._download_json( + 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale, + display_id, query={ + 'filter[type]': filter_type, + 'filter[uriSlug]': display_id, + 'rb3Schema': 'v1:hero', + })['data']['id'] + + return self.url_result( + 'https://www.redbull.com/embed/' + rrn_id, + RedBullEmbedIE.ie_key(), rrn_id) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py new file mode 100644 index 0000000..62f669f --- /dev/null +++ b/yt_dlp/extractor/reddit.py @@ -0,0 +1,353 @@ +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + traverse_obj, + try_get, + unescapeHTML, + urlencode_postdata, + url_or_none, +) + + +class RedditIE(InfoExtractor): + _NETRC_MACHINE = 'reddit' + _VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'display_id': '6rrwyj', + 'title': 'That small heart attack.', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:4', + 'timestamp': 1501941939, + 'upload_date': '20170805', + 'uploader': 'Antw87', + 'duration': 12, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 0, + 'channel_id': 'videos', + }, + 'params': { + 'skip_download': True, + }, + }, { + # 1080p fallback format + 'url': 'https://www.reddit.com/r/aww/comments/90bu6w/heat_index_was_110_degrees_so_we_offered_him_a/', + 'md5': '8b5902cfda3006bf90faea7adf765a49', + 'info_dict': { + 'id': 'gyh95hiqc0b11', + 'ext': 'mp4', + 'display_id': '90bu6w', + 'title': 'Heat index was 110 degrees so we offered him a cold drink. He went for a full body soak instead', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:7', + 'timestamp': 1532051078, + 'upload_date': '20180720', + 'uploader': 'FootLoosePickleJuice', + 'duration': 14, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 0, + 'channel_id': 'aww', + }, + }, { + # User post + 'url': 'https://www.reddit.com/user/creepyt0es/comments/nip71r/i_plan_to_make_more_stickers_and_prints_check/', + 'info_dict': { + 'id': 'zasobba6wp071', + 'ext': 'mp4', + 'display_id': 'nip71r', + 'title': 'I plan to make more stickers and prints! Check them out on my Etsy! Or get them through my Patreon. Links below.', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:5', + 'timestamp': 1621709093, + 'upload_date': '20210522', + 'uploader': 'creepyt0es', + 'duration': 6, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 0, + 'channel_id': 'u_creepyt0es', + }, + 'params': { + 'skip_download': True, + }, + }, { + # videos embedded in reddit text post + 'url': 'https://www.reddit.com/r/KamenRider/comments/wzqkxp/finale_kamen_rider_revice_episode_50_family_to/', + 'playlist_count': 2, + 'info_dict': { + 'id': 'wzqkxp', + 'title': 'md5:72d3d19402aa11eff5bd32fc96369b37', + }, + }, { + # crossposted reddit-hosted media + 'url': 'https://www.reddit.com/r/dumbfuckers_club/comments/zjjw82/cringe/', + 'md5': '746180895c7b75a9d6b05341f507699a', + 'info_dict': { + 'id': 'a1oneun6pa5a1', + 'ext': 'mp4', + 'display_id': 'zjjw82', + 'title': 'Cringe', + 'uploader': 'Otaku-senpai69420', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'upload_date': '20221212', + 'timestamp': 1670812309, + 'duration': 16, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 0, + 'channel_id': 'dumbfuckers_club', + }, + }, { + # post link without subreddit + 'url': 'https://www.reddit.com/comments/124pp33', + 'md5': '15eec9d828adcef4468b741a7e45a395', + 'info_dict': { + 'id': 'antsenjc2jqa1', + 'ext': 'mp4', + 'display_id': '124pp33', + 'title': 'Harmless prank of some old friends', + 'uploader': 'Dudezila', + 'channel_id': 'ContagiousLaughter', + 'duration': 17, + 'upload_date': '20230328', + 'timestamp': 1680012043, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'age_limit': 0, + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + }, + }, { + # quarantined subreddit post + 'url': 'https://old.reddit.com/r/GenZedong/comments/12fujy3/based_hasan/', + 'md5': '3156ea69e3c1f1b6259683c5abd36e71', + 'info_dict': { + 'id': '8bwtclfggpsa1', + 'ext': 'mp4', + 'display_id': '12fujy3', + 'title': 'Based Hasan?', + 'uploader': 'KingNigelXLII', + 'channel_id': 'GenZedong', + 'duration': 16, + 'upload_date': '20230408', + 'timestamp': 1680979138, + 'age_limit': 0, + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + }, + 'skip': 'Requires account that has opted-in to the GenZedong subreddit', + }, { + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', + 'only_matching': True, + }, { + # imgur + 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', + 'only_matching': True, + }, { + # imgur @ old reddit + 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', + 'only_matching': True, + }, { + # streamable + 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', + 'only_matching': True, + }, { + # youtube + 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', + 'only_matching': True, + }, { + # reddit video @ nm reddit + 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/', + 'only_matching': True, + }, { + 'url': 'https://www.redditmedia.com/r/serbia/comments/pu9wbx/ako_vu%C4%8Di%C4%87_izgubi_izbore_ja_%C4%87u_da_crknem/', + 'only_matching': True, + }] + + def _perform_login(self, username, password): + captcha = self._download_json( + 'https://www.reddit.com/api/requires_captcha/login.json', None, + 'Checking login requirement')['required'] + if captcha: + raise ExtractorError('Reddit is requiring captcha before login', expected=True) + login = self._download_json( + f'https://www.reddit.com/api/login/{username}', None, data=urlencode_postdata({ + 'op': 'login-main', + 'user': username, + 'passwd': password, + 'api_type': 'json', + }), note='Logging in', errnote='Login request failed') + errors = '; '.join(traverse_obj(login, ('json', 'errors', ..., 1))) + if errors: + raise ExtractorError(f'Unable to login, Reddit API says {errors}', expected=True) + elif not traverse_obj(login, ('json', 'data', 'cookie', {str})): + raise ExtractorError('Unable to login, no cookie was returned') + + def _real_extract(self, url): + host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id') + + data = self._download_json( + f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403) + if not data: + fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com' + self.to_screen(f'{host} request failed, retrying with {fallback_host}') + data = self._download_json( + f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403) + + if traverse_obj(data, 'error') == 403: + reason = data.get('reason') + if reason == 'quarantined': + self.raise_login_required('Quarantined subreddit; an account that has opted in is required') + elif reason == 'private': + self.raise_login_required('Private subreddit; an account that has been approved is required') + else: + raise ExtractorError(f'HTTP Error 403 Forbidden; reason given: {reason}') + + data = data[0]['data']['children'][0]['data'] + video_url = data['url'] + + over_18 = data.get('over_18') + if over_18 is True: + age_limit = 18 + elif over_18 is False: + age_limit = 0 + else: + age_limit = None + + thumbnails = [] + + def add_thumbnail(src): + if not isinstance(src, dict): + return + thumbnail_url = url_or_none(src.get('url')) + if not thumbnail_url: + return + thumbnails.append({ + 'url': unescapeHTML(thumbnail_url), + 'width': int_or_none(src.get('width')), + 'height': int_or_none(src.get('height')), + 'http_headers': {'Accept': '*/*'}, + }) + + for image in try_get(data, lambda x: x['preview']['images']) or []: + if not isinstance(image, dict): + continue + add_thumbnail(image.get('source')) + resolutions = image.get('resolutions') + if isinstance(resolutions, list): + for resolution in resolutions: + add_thumbnail(resolution) + + info = { + 'title': data.get('title'), + 'thumbnails': thumbnails, + 'timestamp': float_or_none(data.get('created_utc')), + 'uploader': data.get('author'), + 'channel_id': data.get('subreddit'), + 'like_count': int_or_none(data.get('ups')), + 'dislike_count': int_or_none(data.get('downs')), + 'comment_count': int_or_none(data.get('num_comments')), + 'age_limit': age_limit, + } + + parsed_url = urllib.parse.urlparse(video_url) + + # Check for embeds in text posts, or else raise to avoid recursing into the same reddit URL + if 'reddit.com' in parsed_url.netloc and f'/{video_id}/' in parsed_url.path: + entries = [] + for media in traverse_obj(data, ('media_metadata', ...), expected_type=dict): + if not media.get('id') or media.get('e') != 'RedditVideo': + continue + formats = [] + if media.get('hlsUrl'): + formats.extend(self._extract_m3u8_formats( + unescapeHTML(media['hlsUrl']), video_id, 'mp4', m3u8_id='hls', fatal=False)) + if media.get('dashUrl'): + formats.extend(self._extract_mpd_formats( + unescapeHTML(media['dashUrl']), video_id, mpd_id='dash', fatal=False)) + if formats: + entries.append({ + 'id': media['id'], + 'display_id': video_id, + 'formats': formats, + **info, + }) + if entries: + return self.playlist_result(entries, video_id, info.get('title')) + raise ExtractorError('No media found', expected=True) + + # Check if media is hosted on reddit: + reddit_video = traverse_obj(data, ( + (None, ('crosspost_parent_list', ...)), ('secure_media', 'media'), 'reddit_video'), get_all=False) + if reddit_video: + playlist_urls = [ + try_get(reddit_video, lambda x: unescapeHTML(x[y])) + for y in ('dash_url', 'hls_url') + ] + + # Update video_id + display_id = video_id + video_id = self._search_regex( + r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'], + 'video_id', default=display_id) + + dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' + hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' + + formats = [{ + 'url': unescapeHTML(reddit_video['fallback_url']), + 'height': int_or_none(reddit_video.get('height')), + 'width': int_or_none(reddit_video.get('width')), + 'tbr': int_or_none(reddit_video.get('bitrate_kbps')), + 'acodec': 'none', + 'vcodec': 'h264', + 'ext': 'mp4', + 'format_id': 'fallback', + 'format_note': 'DASH video, mp4_dash', + }] + hls_fmts, subtitles = self._extract_m3u8_formats_and_subtitles( + hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(hls_fmts) + dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles( + dash_playlist_url, display_id, mpd_id='dash', fatal=False) + formats.extend(dash_fmts) + self._merge_subtitles(dash_subs, target=subtitles) + + return { + **info, + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + 'duration': int_or_none(reddit_video.get('duration')), + } + + if parsed_url.netloc == 'v.redd.it': + self.raise_no_formats('This video is processing', expected=True, video_id=video_id) + return { + **info, + 'id': parsed_url.path.split('/')[1], + 'display_id': video_id, + } + + # Not hosted on reddit, must continue extraction + return { + **info, + 'display_id': video_id, + '_type': 'url_transparent', + 'url': video_url, + } diff --git a/yt_dlp/extractor/redge.py b/yt_dlp/extractor/redge.py new file mode 100644 index 0000000..875d6f8 --- /dev/null +++ b/yt_dlp/extractor/redge.py @@ -0,0 +1,135 @@ +import functools + +from .common import InfoExtractor +from ..networking import HEADRequest +from ..utils import ( + float_or_none, + int_or_none, + join_nonempty, + parse_qs, + update_url_query, +) +from ..utils.traversal import traverse_obj + + +class RedCDNLivxIE(InfoExtractor): + _VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx' + IE_NAME = 'redcdnlivx' + + _TESTS = [{ + 'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000', + 'info_dict': { + 'id': 'ENC02-638272860000-638292544000', + 'ext': 'mp4', + 'title': 'ENC02', + 'duration': 19683.982, + 'live_status': 'was_live', + }, + }, { + 'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000', + 'info_dict': { + 'id': 'ENC18-722333096000-722335562000', + 'ext': 'mp4', + 'title': 'ENC18', + 'duration': 2463.995, + 'live_status': 'was_live', + }, + }, { + 'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000', + 'info_dict': { + 'id': 'triathlon2018-warsaw-550305000000-550327620000', + 'ext': 'mp4', + 'title': 'triathlon2018/warsaw', + 'duration': 22619.98, + 'live_status': 'was_live', + }, + }, { + 'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000', + 'only_matching': True, + }, { + 'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000', + 'only_matching': True, + }] + + """ + Known methods (first in url path): + - `livedash` - DASH MPD + - `livehls` - HTTP Live Streaming + - `livess` - IIS Smooth Streaming + - `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac + - `sc` - shoutcast/icecast (audio streams, like radio) + """ + + def _real_extract(self, url): + tenant, path = self._match_valid_url(url).group('tenant', 'id') + qs = parse_qs(url) + start_time = traverse_obj(qs, ('startTime', 0, {int_or_none})) + stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none})) + + def livx_mode(mode): + suffix = '' + if mode == 'livess': + suffix = '/manifest' + elif mode == 'livehls': + suffix = '/playlist.m3u8' + file_qs = {} + if start_time: + file_qs['startTime'] = start_time + if stop_time: + file_qs['stopTime'] = stop_time + if mode == 'nvr': + file_qs['nolimit'] = 1 + elif mode != 'sc': + file_qs['indexMode'] = 'true' + return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs) + + # no id or title for a transmission. making ones up. + title = path \ + .replace('/live', '').replace('live/', '') \ + .replace('/channel', '').replace('channel/', '') \ + .strip('/') + video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time) + + formats = [] + # downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata + ism_res = self._download_xml_handle( + livx_mode('livess'), video_id, + note='Downloading ISM manifest', + errnote='Failed to download ISM manifest', + fatal=False) + ism_doc = None + if ism_res is not False: + ism_doc, ism_urlh = ism_res + formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss') + + nvr_urlh = self._request_webpage( + HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False, + expected_status=lambda _: True) + if nvr_urlh and nvr_urlh.status == 200: + formats.append({ + 'url': nvr_urlh.url, + 'ext': 'flv', + 'format_id': 'direct-0', + 'preference': -1, # might be slow + }) + formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False)) + formats.extend(self._extract_m3u8_formats( + livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False)) + + time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000 + duration = traverse_obj( + ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None + + live_status = None + if traverse_obj(ism_doc, '@IsLive') == 'TRUE': + live_status = 'is_live' + elif duration: + live_status = 'was_live' + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'duration': duration, + 'live_status': live_status, + } diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py new file mode 100644 index 0000000..f945320 --- /dev/null +++ b/yt_dlp/extractor/redgifs.py @@ -0,0 +1,260 @@ +import functools + +from .common import InfoExtractor +from ..compat import compat_parse_qs +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + try_get, + OnDemandPagedList, +) + + +class RedGifsBaseInfoExtractor(InfoExtractor): + _FORMATS = { + 'gif': 250, + 'sd': 480, + 'hd': None, + } + + _API_HEADERS = { + 'referer': 'https://www.redgifs.com/', + 'origin': 'https://www.redgifs.com', + 'content-type': 'application/json', + } + + def _parse_gif_data(self, gif_data): + video_id = gif_data.get('id') + quality = qualities(tuple(self._FORMATS.keys())) + + orig_height = int_or_none(gif_data.get('height')) + aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width']) + + formats = [] + for format_id, height in self._FORMATS.items(): + video_url = gif_data['urls'].get(format_id) + if not video_url: + continue + height = min(orig_height, height or orig_height) + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': height * aspect_ratio if aspect_ratio else None, + 'height': height, + 'quality': quality(format_id), + }) + + return { + 'id': video_id, + 'webpage_url': f'https://redgifs.com/watch/{video_id}', + 'extractor_key': RedGifsIE.ie_key(), + 'extractor': 'RedGifs', + 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs', + 'timestamp': int_or_none(gif_data.get('createDate')), + 'uploader': gif_data.get('userName'), + 'duration': int_or_none(gif_data.get('duration')), + 'view_count': int_or_none(gif_data.get('views')), + 'like_count': int_or_none(gif_data.get('likes')), + 'categories': gif_data.get('tags') or [], + 'tags': gif_data.get('tags'), + 'age_limit': 18, + 'formats': formats, + } + + def _fetch_oauth_token(self, video_id): + # https://github.com/Redgifs/api/wiki/Temporary-tokens + auth = self._download_json('https://api.redgifs.com/v2/auth/temporary', + video_id, note='Fetching temporary token') + if not auth.get('token'): + raise ExtractorError('Unable to get temporary token') + self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}' + + def _call_api(self, ep, video_id, *args, **kwargs): + for first_attempt in True, False: + if 'authorization' not in self._API_HEADERS: + self._fetch_oauth_token(video_id) + try: + headers = dict(self._API_HEADERS) + headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}' + data = self._download_json( + f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs) + break + except ExtractorError as e: + if first_attempt and isinstance(e.cause, HTTPError) and e.cause.status == 401: + del self._API_HEADERS['authorization'] # refresh the token + continue + raise + + if 'error' in data: + raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id) + return data + + def _fetch_page(self, ep, video_id, query, page): + query['page'] = page + 1 + data = self._call_api( + ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}') + + for entry in data['gifs']: + yield self._parse_gif_data(entry) + + def _prepare_api_query(self, query, fields): + api_query = [ + (field_name, query.get(field_name, (default,))[0]) + for field_name, default in fields.items()] + + return {key: val for key, val in api_query if val is not None} + + def _paged_entries(self, ep, item_id, query, fields): + page = int_or_none(query.get('page', (None,))[0]) + page_fetcher = functools.partial( + self._fetch_page, ep, item_id, self._prepare_api_query(query, fields)) + return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE) + + +class RedGifsIE(RedGifsBaseInfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)' + _TESTS = [{ + 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + 'tags': list, + } + }, { + 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + 'tags': list, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url).lower() + video_info = self._call_api( + f'gifs/{video_id}?views=yes', video_id, note='Downloading video info') + return self._parse_gif_data(video_info['gif']) + + +class RedGifsSearchIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs search' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)' + _PAGE_SIZE = 80 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/browse?tags=Lesbian', + 'info_dict': { + 'id': 'tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by trending' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian&page=2', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_count': 80, + } + ] + + def _real_extract(self, url): + query_str = self._match_valid_url(url).group('query') + query = compat_parse_qs(query_str) + if not query.get('tags'): + raise ExtractorError('Invalid query tags', expected=True) + + tags = query.get('tags')[0] + order = query.get('order', ('trending',))[0] + + query['search_text'] = [tags] + entries = self._paged_entries('gifs/search', query_str, query, { + 'search_text': None, + 'order': 'trending', + 'type': None, + }) + + return self.playlist_result( + entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}') + + +class RedGifsUserIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs user' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?' + _PAGE_SIZE = 30 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/users/lamsinka89', + 'info_dict': { + 'id': 'lamsinka89', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?page=3', + 'info_dict': { + 'id': 'lamsinka89?page=3', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_count': 30, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', + 'info_dict': { + 'id': 'lamsinka89?order=best&type=g', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by best' + }, + 'playlist_mincount': 100, + } + ] + + def _real_extract(self, url): + username, query_str = self._match_valid_url(url).group('username', 'query') + playlist_id = f'{username}?{query_str}' if query_str else username + + query = compat_parse_qs(query_str) + order = query.get('order', ('recent',))[0] + + entries = self._paged_entries(f'users/{username}/search', playlist_id, query, { + 'order': 'recent', + 'type': None, + }) + + return self.playlist_result( + entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}') diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py new file mode 100644 index 0000000..965abbe --- /dev/null +++ b/yt_dlp/extractor/redtube.py @@ -0,0 +1,144 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + merge_dicts, + str_to_int, + unified_strdate, + url_or_none, + urljoin, +) + + +class RedTubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' + _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)'] + _TESTS = [{ + 'url': 'https://www.redtube.com/38864951', + 'md5': '4fba70cbca3aefd25767ab4b523c9878', + 'info_dict': { + 'id': '38864951', + 'ext': 'mp4', + 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu', + 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu', + 'upload_date': '20210111', + 'timestamp': 1610343109, + 'duration': 646, + 'view_count': int, + 'age_limit': 18, + 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg', + }, + }, { + 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', + 'only_matching': True, + }, { + 'url': 'http://it.redtube.com/66418', + 'only_matching': True, + }, { + 'url': 'https://www.redtube.com.br/103224331', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + f'https://www.redtube.com/{video_id}', video_id) + + ERRORS = ( + (('video-deleted-info', '>This video has been removed'), 'has been removed'), + (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'), + ) + + for patterns, message in ERRORS: + if any(p in webpage for p in patterns): + raise ExtractorError( + 'Video %s %s' % (video_id, message), expected=True) + + info = self._search_json_ld(webpage, video_id, default={}) + + if not info.get('title'): + info['title'] = self._html_search_regex( + (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', + r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), + webpage, 'title', group='title', + default=None) or self._og_search_title(webpage) + + formats = [] + sources = self._parse_json( + self._search_regex( + r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), + video_id, fatal=False) + if sources and isinstance(sources, dict): + for format_id, format_url in sources.items(): + if format_url: + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'height': int_or_none(format_id), + }) + medias = self._parse_json( + self._search_regex( + r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage, + 'media definitions', default='{}'), + video_id, fatal=False) + for media in medias if isinstance(medias, list) else []: + format_url = urljoin('https://www.redtube.com', media.get('videoUrl')) + if not format_url: + continue + format_id = media.get('format') + quality = media.get('quality') + if format_id == 'hls' or (format_id == 'mp4' and not quality): + more_media = self._download_json(format_url, video_id, fatal=False) + else: + more_media = [media] + for media in more_media if isinstance(more_media, list) else []: + format_url = url_or_none(media.get('videoUrl')) + if not format_url: + continue + format_id = media.get('format') + if format_id == 'hls' or determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id or 'hls', + fatal=False)) + continue + format_id = media.get('quality') + formats.append({ + 'url': format_url, + 'ext': 'mp4', + 'format_id': format_id, + 'height': int_or_none(format_id), + }) + if not formats: + video_url = self._html_search_regex( + r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') + formats.append({'url': video_url, 'ext': 'mp4'}) + + thumbnail = self._og_search_thumbnail(webpage) + upload_date = unified_strdate(self._search_regex( + r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<', + webpage, 'upload date', default=None)) + duration = int_or_none(self._og_search_property( + 'video:duration', webpage, default=None) or self._search_regex( + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) + view_count = str_to_int(self._search_regex( + (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', + r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)', + r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'), + webpage, 'view count', default=None)) + + # No self-labeling, but they describe themselves as + # "Home of Videos Porno" + age_limit = 18 + + return merge_dicts(info, { + 'id': video_id, + 'ext': 'mp4', + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'age_limit': age_limit, + 'formats': formats, + }) diff --git a/yt_dlp/extractor/rentv.py b/yt_dlp/extractor/rentv.py new file mode 100644 index 0000000..abb537c --- /dev/null +++ b/yt_dlp/extractor/rentv.py @@ -0,0 +1,104 @@ +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + url_or_none, +) + + +class RENTVIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://ren.tv/video/epizod/118577', + 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', + 'info_dict': { + 'id': '118577', + 'ext': 'mp4', + 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"', + 'timestamp': 1472230800, + 'upload_date': '20160826', + } + }, { + 'url': 'http://ren.tv/player/118577', + 'only_matching': True, + }, { + 'url': 'rentv:118577', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id) + config = self._parse_json(self._search_regex( + r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id) + title = config['title'] + formats = [] + for video in config['src']: + src = url_or_none(video.get('src')) + if not src: + continue + ext = determine_ext(src) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + }) + return { + 'id': video_id, + 'title': title, + 'description': config.get('description'), + 'thumbnail': config.get('image'), + 'duration': int_or_none(config.get('duration')), + 'timestamp': int_or_none(config.get('date')), + 'formats': formats, + } + + +class RENTVArticleIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v', + 'md5': 'ebd63c4680b167693745ab91343df1d6', + 'info_dict': { + 'id': '136472', + 'ext': 'mp4', + 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла', + 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.', + } + }, { + # TODO: invalid m3u8 + 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video', + 'info_dict': { + 'id': 'playlist', + 'ext': 'mp4', + 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ', + 'uploader': 'ren.tv', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, + 'skip': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + + entries = [] + for config_profile in drupal_settings.get('ren_jwplayer', {}).values(): + media_id = config_profile.get('mediaid') + if not media_id: + continue + media_id = compat_str(media_id) + entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id)) + return self.playlist_result(entries, display_id) diff --git a/yt_dlp/extractor/restudy.py b/yt_dlp/extractor/restudy.py new file mode 100644 index 0000000..f49262a --- /dev/null +++ b/yt_dlp/extractor/restudy.py @@ -0,0 +1,41 @@ +from .common import InfoExtractor + + +class RestudyIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.restudy.dk/video/play/id/1637', + 'info_dict': { + 'id': '1637', + 'ext': 'flv', + 'title': 'Leiden-frosteffekt', + 'description': 'Denne video er et eksperiment med flydende kvælstof.', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage).strip() + description = self._og_search_description(webpage).strip() + + formats = self._extract_smil_formats( + 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id, + video_id) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + } diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py new file mode 100644 index 0000000..0a8f13b --- /dev/null +++ b/yt_dlp/extractor/reuters.py @@ -0,0 +1,66 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + int_or_none, + unescapeHTML, +) + + +class ReutersIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', + 'md5': '8015113643a0b12838f160b0b81cc2ee', + 'info_dict': { + 'id': '368575562', + 'ext': 'mp4', + 'title': 'San Francisco police chief resigns', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id) + video_data = js_to_json(self._search_regex( + r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', + webpage, 'video data')) + + def get_json_value(key, fatal=False): + return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) + + title = unescapeHTML(get_json_value('title', fatal=True)) + mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() + + mas_data = self._download_json( + 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid), + video_id, transform_source=js_to_json) + formats = [] + for f in mas_data: + f_url = f.get('url') + if not f_url: + continue + method = f.get('method') + if method == 'hls': + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + container = f.get('container') + ext = '3gp' if method == 'mobile' else container + formats.append({ + 'format_id': ext, + 'url': f_url, + 'ext': ext, + 'container': container if method != 'mobile' else None, + }) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': get_json_value('thumb'), + 'duration': int_or_none(get_json_value('seconds')), + 'formats': formats, + } diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py new file mode 100644 index 0000000..06b6c3c --- /dev/null +++ b/yt_dlp/extractor/reverbnation.py @@ -0,0 +1,51 @@ +from .common import InfoExtractor +from ..utils import ( + qualities, + str_or_none, +) + + +class ReverbNationIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' + _TESTS = [{ + 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', + 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', + 'info_dict': { + 'id': '16965047', + 'ext': 'mp3', + 'title': 'MONA LISA', + 'uploader': 'ALKILADOS', + 'uploader_id': '216429', + 'thumbnail': r're:^https?://.*\.jpg', + }, + }] + + def _real_extract(self, url): + song_id = self._match_id(url) + + api_res = self._download_json( + 'https://api.reverbnation.com/song/%s' % song_id, + song_id, + note='Downloading information of song %s' % song_id + ) + + THUMBNAILS = ('thumbnail', 'image') + quality = qualities(THUMBNAILS) + thumbnails = [] + for thumb_key in THUMBNAILS: + if api_res.get(thumb_key): + thumbnails.append({ + 'url': api_res[thumb_key], + 'preference': quality(thumb_key) + }) + + return { + 'id': song_id, + 'title': api_res['name'], + 'url': api_res['url'], + 'uploader': api_res.get('artist', {}).get('name'), + 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), + 'thumbnails': thumbnails, + 'ext': 'mp3', + 'vcodec': 'none', + } diff --git a/yt_dlp/extractor/rheinmaintv.py b/yt_dlp/extractor/rheinmaintv.py new file mode 100644 index 0000000..c3b352d --- /dev/null +++ b/yt_dlp/extractor/rheinmaintv.py @@ -0,0 +1,94 @@ +from .common import InfoExtractor +from ..utils import extract_attributes, merge_dicts, remove_end + + +class RheinMainTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)' + _TESTS = [{ + 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/', + 'info_dict': { + 'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022', + 'ext': 'ismv', # ismv+isma will be merged into mp4 + 'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft', + 'title': 'Auf dem Weg zur Deutschen Meisterschaft', + 'upload_date': '20221108', + 'view_count': int, + 'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft', + 'thumbnail': r're:^https://.+\.jpg', + 'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9', + 'timestamp': 1667933057, + 'duration': 243.0, + }, + 'params': {'skip_download': 'ism'}, + }, { + 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/', + 'info_dict': { + 'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022', + 'ext': 'ismv', + 'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften', + 'timestamp': 1668526214, + 'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften', + 'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften', + 'view_count': int, + 'thumbnail': r're:^https://.+\.jpg', + 'duration': 345.0, + 'description': 'md5:9370ba29526984006c2cba1372e5c5a0', + 'upload_date': '20221115', + }, + 'params': {'skip_download': 'ism'}, + }, { + 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/', + 'info_dict': { + 'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022', + 'ext': 'ismv', + 'title': 'Casino Mainz bei den Deutschen Meisterschaften', + 'view_count': int, + 'timestamp': 1668527402, + 'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften', + 'upload_date': '20221115', + 'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften', + 'duration': 348.0, + 'thumbnail': r're:^https://.+\.jpg', + 'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa', + }, + 'params': {'skip_download': 'ism'}, + }, { + 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id = mobj.group('display_id') + video_id = mobj.group('video_id').replace('/', '-') + webpage = self._download_webpage(url, video_id) + + source, img = self._search_regex(r'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)', + webpage, 'video', group=('source', 'img')) + source = extract_attributes(source) + img = extract_attributes(img) + + raw_json_ld = list(self._yield_json_ld(webpage, video_id)) + json_ld = self._json_ld(raw_json_ld, video_id) + json_ld.pop('url', None) + + ism_manifest_url = ( + source.get('src') + or next(json_ld.get('embedUrl') for json_ld in raw_json_ld if json_ld.get('@type') == 'VideoObject') + ) + formats, subtitles = self._extract_ism_formats_and_subtitles(ism_manifest_url, video_id) + + return merge_dicts({ + 'id': video_id, + 'display_id': display_id, + 'title': + self._html_search_regex(r'<h1><span class="title">([^<]*)</span>', + webpage, 'headline', default=None) + or img.get('title') or json_ld.get('title') or self._og_search_title(webpage) + or remove_end(self._html_extract_title(webpage), ' -'), + 'alt_title': img.get('alt'), + 'description': json_ld.get('description') or self._og_search_description(webpage), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': [{'url': img['src']}] if 'src' in img else json_ld.get('thumbnails'), + }, json_ld) diff --git a/yt_dlp/extractor/ridehome.py b/yt_dlp/extractor/ridehome.py new file mode 100644 index 0000000..78f838a --- /dev/null +++ b/yt_dlp/extractor/ridehome.py @@ -0,0 +1,96 @@ +from .art19 import Art19IE +from .common import InfoExtractor +from ..utils import extract_attributes, get_elements_html_by_class +from ..utils.traversal import traverse_obj + + +class RideHomeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ridehome\.info/show/[\w-]+/(?P<id>[\w-]+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://www.ridehome.info/show/techmeme-ride-home/thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs/', + 'info_dict': { + 'id': 'thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs', + }, + 'playlist_count': 1, + 'playlist': [{ + 'md5': 'c84ea3cc96950a9ab86fe540f3edc588', + 'info_dict': { + 'id': '540e5493-9fe6-4c14-a488-dc508d8794b2', + 'ext': 'mp3', + 'title': 'Thu. 12/28 – Will 2024 Be The Year Apple Gets Serious About Gaming On Macs?', + 'description': 'md5:9dba86ae9b5047a8150eceddeeb629c2', + 'series': 'Techmeme Ride Home', + 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b', + 'upload_date': '20231228', + 'timestamp': 1703780995, + 'modified_date': '20231230', + 'episode_id': '540e5493-9fe6-4c14-a488-dc508d8794b2', + 'modified_timestamp': 1703912404, + 'release_date': '20231228', + 'release_timestamp': 1703782800, + 'duration': 1000.1502, + 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$', + }, + }], + }, { + 'url': 'https://www.ridehome.info/show/techmeme-ride-home/portfolio-profile-sensel-with-ilyarosenberg/', + 'info_dict': { + 'id': 'portfolio-profile-sensel-with-ilyarosenberg', + }, + 'playlist_count': 1, + 'playlist': [{ + 'md5': 'bf9d6efad221008ce71aea09d5533cf6', + 'info_dict': { + 'id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac', + 'ext': 'mp3', + 'title': '(Portfolio Profile) Sensel - With @IlyaRosenberg', + 'description': 'md5:e1e4a970bce04290e0ba6f030b0125db', + 'series': 'Techmeme Ride Home', + 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b', + 'upload_date': '20220108', + 'timestamp': 1641656064, + 'modified_date': '20230418', + 'episode_id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac', + 'modified_timestamp': 1681843318, + 'release_date': '20220108', + 'release_timestamp': 1641672000, + 'duration': 2789.38122, + 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$' + }, + }], + }, { + 'url': 'https://www.ridehome.info/show/spacecasts/big-tech-news-apples-macbook-pro-event/', + 'info_dict': { + 'id': 'big-tech-news-apples-macbook-pro-event', + }, + 'playlist_count': 1, + 'playlist': [{ + 'md5': 'b1428530c6e03904a8271e978007fc05', + 'info_dict': { + 'id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7', + 'ext': 'mp3', + 'title': 'md5:e6c05d44d59b6577a4145ac339de5040', + 'description': 'md5:14152f7228c8a301a77e3d6bc891b145', + 'series': 'SpaceCasts', + 'series_id': '8e3e837d-7fe0-4a23-8e11-894917e07e17', + 'upload_date': '20211026', + 'timestamp': 1635271450, + 'modified_date': '20230502', + 'episode_id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7', + 'modified_timestamp': 1683057500, + 'release_date': '20211026', + 'release_timestamp': 1635272124, + 'duration': 2266.30531, + 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$' + }, + }], + }] + + def _real_extract(self, url): + article_id = self._match_id(url) + webpage = self._download_webpage(url, article_id) + + urls = traverse_obj( + get_elements_html_by_class('iframeContainer', webpage), + (..., {extract_attributes}, lambda k, v: k == 'data-src' and Art19IE.suitable(v))) + return self.playlist_from_matches(urls, article_id, ie=Art19IE) diff --git a/yt_dlp/extractor/rinsefm.py b/yt_dlp/extractor/rinsefm.py new file mode 100644 index 0000000..f87b895 --- /dev/null +++ b/yt_dlp/extractor/rinsefm.py @@ -0,0 +1,89 @@ +from .common import InfoExtractor +from ..utils import ( + MEDIA_EXTENSIONS, + determine_ext, + parse_iso8601, + traverse_obj, + url_or_none, +) + + +class RinseFMBaseIE(InfoExtractor): + @staticmethod + def _parse_entry(entry): + return { + **traverse_obj(entry, { + 'id': ('id', {str}), + 'title': ('title', {str}), + 'url': ('fileUrl', {url_or_none}), + 'release_timestamp': ('episodeDate', {parse_iso8601}), + 'thumbnail': ('featuredImage', 0, 'filename', {str}, + {lambda x: x and f'https://rinse.imgix.net/media/{x}'}), + 'webpage_url': ('slug', {str}, + {lambda x: x and f'https://rinse.fm/episodes/{x}'}), + }), + 'vcodec': 'none', + 'extractor_key': RinseFMIE.ie_key(), + 'extractor': RinseFMIE.IE_NAME, + } + + +class RinseFMIE(RinseFMBaseIE): + _VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/', + 'md5': '76ee0b719315617df42e15e710f46c7b', + 'info_dict': { + 'id': '1536535', + 'ext': 'mp3', + 'title': 'Club Glow - 15/12/2023 - 20:00', + 'thumbnail': r're:^https://.+\.(?:jpg|JPG)$', + 'release_timestamp': 1702598400, + 'release_date': '20231215' + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry'] + + return self._parse_entry(entry) + + +class RinseFMArtistPlaylistIE(RinseFMBaseIE): + _VALID_URL = r'https?://(?:www\.)?rinse\.fm/shows/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://rinse.fm/shows/resources/', + 'info_dict': { + 'id': 'resources', + 'title': '[re]sources', + 'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.' + }, + 'playlist_mincount': 40 + }, { + 'url': 'https://rinse.fm/shows/ivy/', + 'info_dict': { + 'id': 'ivy', + 'title': '[IVY]', + 'description': 'A dedicated space for DNB/Turbo House and 4x4.' + }, + 'playlist_mincount': 7 + }] + + def _entries(self, data): + for episode in traverse_obj(data, ( + 'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio) + ): + yield self._parse_entry(episode) + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + title = self._og_search_title(webpage) or self._html_search_meta('title', webpage) + description = self._og_search_description(webpage) or self._html_search_meta( + 'description', webpage) + data = self._search_nextjs_data(webpage, playlist_id) + + return self.playlist_result( + self._entries(data), playlist_id, title, description=description) diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py new file mode 100644 index 0000000..8d29b30 --- /dev/null +++ b/yt_dlp/extractor/rmcdecouverte.py @@ -0,0 +1,71 @@ +from .common import InfoExtractor +from .brightcove import BrightcoveLegacyIE +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import smuggle_url + + +class RMCDecouverteIE(InfoExtractor): + _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:[^?#]*_(?P<id>\d+)|mediaplayer-direct)/?(?:[#?]|$)' + + _TESTS = [{ + 'url': 'https://rmcdecouverte.bfmtv.com/vestiges-de-guerre_22240/les-bunkers-secrets-domaha-beach_25303/', + 'info_dict': { + 'id': '6250879771001', + 'ext': 'mp4', + 'title': 'LES BUNKERS SECRETS D´OMAHA BEACH', + 'uploader_id': '1969646226001', + 'description': 'md5:aed573ca24abde62a148e0eba909657d', + 'timestamp': 1619622984, + 'upload_date': '20210428', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', + 'info_dict': { + 'id': '5983675500001', + 'ext': 'mp4', + 'title': 'CORVETTE', + 'description': 'md5:c1e8295521e45ffebf635d6a7658f506', + 'uploader_id': '1969646226001', + 'upload_date': '20181226', + 'timestamp': 1545861635, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'only available for a week', + }, { + 'url': 'https://rmcdecouverte.bfmtv.com/avions-furtifs-la-technologie-de-lextreme_10598', + 'only_matching': True, + }, { + # The website accepts any URL as long as it has _\d+ at the end + 'url': 'https://rmcdecouverte.bfmtv.com/any/thing/can/go/here/_10598', + 'only_matching': True, + }, { + # live, geo restricted, bypassable + 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id = mobj.group('id') or 'direct' + webpage = self._download_webpage(url, display_id) + brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) + if brightcove_legacy_url: + brightcove_id = compat_parse_qs(compat_urlparse.urlparse( + brightcove_legacy_url).query)['@videoPlayer'][0] + else: + brightcove_id = self._search_regex( + r'data-video-id=["\'](\d+)', webpage, 'brightcove id') + return self.url_result( + smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['FR']}), + 'BrightcoveNew', brightcove_id) diff --git a/yt_dlp/extractor/rockstargames.py b/yt_dlp/extractor/rockstargames.py new file mode 100644 index 0000000..1662243 --- /dev/null +++ b/yt_dlp/extractor/rockstargames.py @@ -0,0 +1,65 @@ +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, +) + + +class RockstarGamesIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.rockstargames.com/videos/video/11544/', + 'md5': '03b5caa6e357a4bd50e3143fc03e5733', + 'info_dict': { + 'id': '11544', + 'ext': 'mp4', + 'title': 'Further Adventures in Finance and Felony Trailer', + 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1464876000, + 'upload_date': '20160602', + } + }, { + 'url': 'http://www.rockstargames.com/videos#/?video=48', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'https://www.rockstargames.com/videoplayer/videos/get-video.json', + video_id, query={ + 'id': video_id, + 'locale': 'en_us', + })['video'] + + title = video['title'] + + formats = [] + for v in video['files_processed']['video/mp4']: + if not v.get('src'): + continue + resolution = v.get('resolution') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', resolution or '', 'height', default=None)) + formats.append({ + 'url': self._proto_relative_url(v['src']), + 'format_id': resolution, + 'height': height, + }) + + if not formats: + youtube_id = video.get('youtube_id') + if youtube_id: + return self.url_result(youtube_id, 'Youtube') + + return { + 'id': video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': self._proto_relative_url(video.get('screencap')), + 'timestamp': parse_iso8601(video.get('created')), + 'formats': formats, + } diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py new file mode 100644 index 0000000..5099f3a --- /dev/null +++ b/yt_dlp/extractor/rokfin.py @@ -0,0 +1,455 @@ +import itertools +import json +import re +import urllib.parse +from datetime import datetime + +from .common import InfoExtractor, SearchInfoExtractor +from ..utils import ( + ExtractorError, + determine_ext, + float_or_none, + format_field, + int_or_none, + str_or_none, + traverse_obj, + try_get, + unescapeHTML, + unified_timestamp, + url_or_none, + urlencode_postdata, +) + +_API_BASE_URL = 'https://prod-api-v2.production.rokfin.com/api/v2/public/' + + +class RokfinIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)' + _NETRC_MACHINE = 'rokfin' + _AUTH_BASE = 'https://secure.rokfin.com/auth/realms/rokfin-web/protocol/openid-connect' + _access_mgmt_tokens = {} # OAuth 2.0: RFC 6749, Sec. 1.4-5 + _TESTS = [{ + 'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change', + 'info_dict': { + 'id': 'post/57548', + 'ext': 'mp4', + 'title': 'Mitt Romney\'s Crazy Solution To Climate Change', + 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', + 'upload_date': '20211023', + 'timestamp': 1634998029, + 'channel': 'Jimmy Dore', + 'channel_id': '65429', + 'channel_url': 'https://rokfin.com/TheJimmyDoreShow', + 'availability': 'public', + 'live_status': 'not_live', + 'dislike_count': int, + 'like_count': int, + 'duration': 213, + } + }, { + 'url': 'https://rokfin.com/post/223/Julian-Assange-Arrested-Streaming-In-Real-Time', + 'info_dict': { + 'id': 'post/223', + 'ext': 'mp4', + 'title': 'Julian Assange Arrested: Streaming In Real Time', + 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', + 'upload_date': '20190412', + 'timestamp': 1555052644, + 'channel': 'Ron Placone', + 'channel_id': '10', + 'channel_url': 'https://rokfin.com/RonPlacone', + 'availability': 'public', + 'live_status': 'not_live', + 'dislike_count': int, + 'like_count': int, + 'tags': ['FreeThinkingMedia^', 'RealProgressives^'], + } + }, { + 'url': 'https://www.rokfin.com/stream/10543/Its-A-Crazy-Mess-Regional-Director-Blows-Whistle-On-Pfizers-Vaccine-Trial-Data', + 'info_dict': { + 'id': 'stream/10543', + 'ext': 'mp4', + 'title': '"It\'s A Crazy Mess" Regional Director Blows Whistle On Pfizer\'s Vaccine Trial Data', + 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', + 'description': 'md5:324ce2d3e3b62e659506409e458b9d8e', + 'channel': 'TLAVagabond', + 'channel_id': '53856', + 'channel_url': 'https://rokfin.com/TLAVagabond', + 'availability': 'public', + 'is_live': False, + 'was_live': True, + 'live_status': 'was_live', + 'timestamp': 1635874720, + 'release_timestamp': 1635874720, + 'release_date': '20211102', + 'upload_date': '20211102', + 'dislike_count': int, + 'like_count': int, + 'tags': ['FreeThinkingMedia^'], + } + }, { + 'url': 'https://rokfin.com/post/126703/Brave-New-World--Aldous-Huxley-DEEPDIVE--Chpts-13--Quite-Frankly--Jay-Dyer', + 'info_dict': { + 'id': 'post/126703', + 'ext': 'mp4', + 'title': 'Brave New World - Aldous Huxley DEEPDIVE! (Chpts 1-3) - Quite Frankly & Jay Dyer', + 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', + 'channel': 'Jay Dyer', + 'channel_id': '186881', + 'channel_url': 'https://rokfin.com/jaydyer', + 'availability': 'premium_only', + 'live_status': 'not_live', + 'dislike_count': int, + 'like_count': int, + 'timestamp': 1678213357, + 'upload_date': '20230307', + 'tags': ['FreeThinkingMedia^', 'OpenMind^'], + 'description': 'md5:cb04e32e68326c9b2b251b297bacff35', + 'duration': 3100, + } + }, { + 'url': 'https://rokfin.com/stream/31332/The-Grayzone-live-on-Nordstream-blame-game', + 'info_dict': { + 'id': 'stream/31332', + 'ext': 'mp4', + 'title': 'The Grayzone live on Nordstream blame game', + 'thumbnail': r're:https://image\.v\.rokfin\.com/.+', + 'channel': 'Max Blumenthal', + 'channel_id': '248902', + 'channel_url': 'https://rokfin.com/MaxBlumenthal', + 'availability': 'premium_only', + 'live_status': 'was_live', + 'dislike_count': int, + 'like_count': int, + 'timestamp': 1678475166, + 'release_timestamp': 1678475166.0, + 'release_date': '20230310', + 'upload_date': '20230310', + 'tags': ['FreeThinkingMedia^'], + } + }] + + def _real_extract(self, url): + video_id, video_type = self._match_valid_url(url).group('id', 'type') + metadata = self._download_json_using_access_token(f'{_API_BASE_URL}{video_id}', video_id) + + scheduled = unified_timestamp(metadata.get('scheduledAt')) + live_status = ('was_live' if metadata.get('stoppedAt') + else 'is_upcoming' if scheduled + else 'is_live' if video_type == 'stream' + else 'not_live') + + video_url = traverse_obj(metadata, 'url', ('content', 'contentUrl'), expected_type=url_or_none) + if video_url in (None, 'fake.m3u8'): + video_url = format_field(self._search_regex( + r'https?://[^/]+/([^/]+)/storyboard.vtt', + traverse_obj(metadata, 'timelineUrl', ('content', 'timelineUrl'), expected_type=url_or_none), + video_id, default=None), None, 'https://stream.v.rokfin.com/%s.m3u8') + + formats, subtitles = [{'url': video_url}] if video_url else [], {} + if determine_ext(video_url) == 'm3u8': + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, fatal=False, live=live_status == 'is_live') + + if not formats: + if traverse_obj(metadata, 'premiumPlan', 'premium'): + self.raise_login_required('This video is only available to premium users', True, method='cookies') + elif scheduled: + self.raise_no_formats( + f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}', + video_id=video_id, expected=True) + + uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username')) + timestamp = (scheduled or float_or_none(metadata.get('postedAtMilli'), 1000) + or unified_timestamp(metadata.get('creationDateTime'))) + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': str_or_none(traverse_obj(metadata, 'title', ('content', 'contentTitle'))), + 'duration': float_or_none(traverse_obj(metadata, ('content', 'duration'))), + 'thumbnail': url_or_none(traverse_obj(metadata, 'thumbnail', ('content', 'thumbnailUrl1'))), + 'description': str_or_none(traverse_obj(metadata, 'description', ('content', 'contentDescription'))), + 'like_count': int_or_none(metadata.get('likeCount')), + 'dislike_count': int_or_none(metadata.get('dislikeCount')), + 'channel': str_or_none(traverse_obj(metadata, ('createdBy', 'name'), ('creator', 'name'))), + 'channel_id': str_or_none(traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id'))), + 'channel_url': url_or_none(f'https://rokfin.com/{uploader}') if uploader else None, + 'timestamp': timestamp, + 'release_timestamp': timestamp if live_status != 'not_live' else None, + 'tags': traverse_obj(metadata, ('tags', ..., 'title'), expected_type=str_or_none), + 'live_status': live_status, + 'availability': self._availability( + needs_premium=bool(traverse_obj(metadata, 'premiumPlan', 'premium')), + is_private=False, needs_subscription=False, needs_auth=False, is_unlisted=False), + # 'comment_count': metadata.get('numComments'), # Data provided by website is wrong + '__post_extractor': self.extract_comments(video_id) if video_type == 'post' else None, + } + + def _get_comments(self, video_id): + pages_total = None + for page_n in itertools.count(): + raw_comments = self._download_json( + f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50', + video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}', + fatal=False) or {} + + for comment in raw_comments.get('content') or []: + yield { + 'text': str_or_none(comment.get('comment')), + 'author': str_or_none(comment.get('name')), + 'id': comment.get('commentId'), + 'author_id': comment.get('userId'), + 'parent': 'root', + 'like_count': int_or_none(comment.get('numLikes')), + 'dislike_count': int_or_none(comment.get('numDislikes')), + 'timestamp': unified_timestamp(comment.get('postedAt')) + } + + pages_total = int_or_none(raw_comments.get('totalPages')) or None + is_last = raw_comments.get('last') + if not raw_comments.get('content') or is_last or (page_n > pages_total if pages_total else is_last is not False): + return + + def _perform_login(self, username, password): + # https://openid.net/specs/openid-connect-core-1_0.html#CodeFlowAuth (Sec. 3.1) + login_page = self._download_webpage( + f'{self._AUTH_BASE}/auth?client_id=web&redirect_uri=https%3A%2F%2Frokfin.com%2Ffeed&response_mode=fragment&response_type=code&scope=openid', + None, note='loading login page', errnote='error loading login page') + authentication_point_url = unescapeHTML(self._search_regex( + r'<form\s+[^>]+action\s*=\s*"(https://secure\.rokfin\.com/auth/realms/rokfin-web/login-actions/authenticate\?[^"]+)"', + login_page, name='Authentication URL')) + + resp_body = self._download_webpage( + authentication_point_url, None, note='logging in', fatal=False, expected_status=404, + data=urlencode_postdata({'username': username, 'password': password, 'rememberMe': 'off', 'credentialId': ''})) + if not self._authentication_active(): + if re.search(r'(?i)(invalid\s+username\s+or\s+password)', resp_body or ''): + raise ExtractorError('invalid username/password', expected=True) + raise ExtractorError('Login failed') + + urlh = self._request_webpage( + f'{self._AUTH_BASE}/auth', None, + note='granting user authorization', errnote='user authorization rejected by Rokfin', + query={ + 'client_id': 'web', + 'prompt': 'none', + 'redirect_uri': 'https://rokfin.com/silent-check-sso.html', + 'response_mode': 'fragment', + 'response_type': 'code', + 'scope': 'openid', + }) + self._access_mgmt_tokens = self._download_json( + f'{self._AUTH_BASE}/token', None, + note='getting access credentials', errnote='error getting access credentials', + data=urlencode_postdata({ + 'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.url).fragment).get('code')[0], + 'client_id': 'web', + 'grant_type': 'authorization_code', + 'redirect_uri': 'https://rokfin.com/silent-check-sso.html' + })) + + def _authentication_active(self): + return not ( + {'KEYCLOAK_IDENTITY', 'KEYCLOAK_IDENTITY_LEGACY', 'KEYCLOAK_SESSION', 'KEYCLOAK_SESSION_LEGACY'} + - set(self._get_cookies(self._AUTH_BASE))) + + def _get_auth_token(self): + return try_get(self._access_mgmt_tokens, lambda x: ' '.join([x['token_type'], x['access_token']])) + + def _download_json_using_access_token(self, url_or_request, video_id, headers={}, query={}): + assert 'authorization' not in headers + headers = headers.copy() + auth_token = self._get_auth_token() + refresh_token = self._access_mgmt_tokens.get('refresh_token') + if auth_token: + headers['authorization'] = auth_token + + json_string, urlh = self._download_webpage_handle( + url_or_request, video_id, headers=headers, query=query, expected_status=401) + if not auth_token or urlh.status != 401 or refresh_token is None: + return self._parse_json(json_string, video_id) + + self._access_mgmt_tokens = self._download_json( + f'{self._AUTH_BASE}/token', video_id, + note='User authorization expired or canceled by Rokfin. Re-authorizing ...', errnote='Failed to re-authorize', + data=urlencode_postdata({ + 'grant_type': 'refresh_token', + 'refresh_token': refresh_token, + 'client_id': 'web' + })) + headers['authorization'] = self._get_auth_token() + if headers['authorization'] is None: + raise ExtractorError('User authorization lost', expected=True) + + return self._download_json(url_or_request, video_id, headers=headers, query=query) + + +class RokfinPlaylistBaseIE(InfoExtractor): + _TYPES = { + 'video': 'post', + 'audio': 'post', + 'stream': 'stream', + 'dead_stream': 'stream', + 'stack': 'stack', + } + + def _get_video_data(self, metadata): + for content in metadata.get('content') or []: + media_type = self._TYPES.get(content.get('mediaType')) + video_id = content.get('id') if media_type == 'post' else content.get('mediaId') + if not media_type or not video_id: + continue + + yield self.url_result(f'https://rokfin.com/{media_type}/{video_id}', video_id=f'{media_type}/{video_id}', + video_title=str_or_none(traverse_obj(content, ('content', 'contentTitle')))) + + +class RokfinStackIE(RokfinPlaylistBaseIE): + IE_NAME = 'rokfin:stack' + IE_DESC = 'Rokfin Stacks' + _VALID_URL = r'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020', + 'playlist_count': 8, + 'info_dict': { + 'id': '271', + }, + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + return self.playlist_result(self._get_video_data( + self._download_json(f'{_API_BASE_URL}stack/{list_id}', list_id)), list_id) + + +class RokfinChannelIE(RokfinPlaylistBaseIE): + IE_NAME = 'rokfin:channel' + IE_DESC = 'Rokfin Channels' + _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$' + _TESTS = [{ + 'url': 'https://rokfin.com/TheConvoCouch', + 'playlist_mincount': 100, + 'info_dict': { + 'id': '12071-new', + 'title': 'TheConvoCouch - New', + 'description': 'md5:bb622b1bca100209b91cd685f7847f06', + }, + }] + + _TABS = { + 'new': 'posts', + 'top': 'top', + 'videos': 'video', + 'podcasts': 'audio', + 'streams': 'stream', + 'stacks': 'stack', + } + + def _real_initialize(self): + self._validate_extractor_args() + + def _validate_extractor_args(self): + requested_tabs = self._configuration_arg('tab', None) + if requested_tabs is not None and (len(requested_tabs) > 1 or requested_tabs[0] not in self._TABS): + raise ExtractorError(f'Invalid extractor-arg "tab". Must be one of {", ".join(self._TABS)}', expected=True) + + def _entries(self, channel_id, channel_name, tab): + pages_total = None + for page_n in itertools.count(0): + if tab in ('posts', 'top'): + data_url = f'{_API_BASE_URL}user/{channel_name}/{tab}?page={page_n}&size=50' + else: + data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}' + metadata = self._download_json( + data_url, channel_name, + note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}') + + yield from self._get_video_data(metadata) + pages_total = int_or_none(metadata.get('totalPages')) or None + is_last = metadata.get('last') + if is_last or (page_n > pages_total if pages_total else is_last is not False): + return + + def _real_extract(self, url): + channel_name = self._match_id(url) + channel_info = self._download_json(f'{_API_BASE_URL}user/{channel_name}', channel_name) + channel_id = channel_info['id'] + tab = self._configuration_arg('tab', default=['new'])[0] + + return self.playlist_result( + self._entries(channel_id, channel_name, self._TABS[tab]), + f'{channel_id}-{tab}', f'{channel_name} - {tab.title()}', str_or_none(channel_info.get('description'))) + + +class RokfinSearchIE(SearchInfoExtractor): + IE_NAME = 'rokfin:search' + IE_DESC = 'Rokfin Search' + _SEARCH_KEY = 'rkfnsearch' + _TYPES = { + 'video': (('id', 'raw'), 'post'), + 'audio': (('id', 'raw'), 'post'), + 'stream': (('content_id', 'raw'), 'stream'), + 'dead_stream': (('content_id', 'raw'), 'stream'), + 'stack': (('content_id', 'raw'), 'stack'), + } + _TESTS = [{ + 'url': 'rkfnsearch5:"zelenko"', + 'playlist_count': 5, + 'info_dict': { + 'id': '"zelenko"', + 'title': '"zelenko"', + } + }] + _db_url = None + _db_access_key = None + + def _real_initialize(self): + self._db_url, self._db_access_key = self.cache.load(self.ie_key(), 'auth', default=(None, None)) + if not self._db_url: + self._get_db_access_credentials() + + def _search_results(self, query): + total_pages = None + for page_number in itertools.count(1): + search_results = self._run_search_query( + query, data={'query': query, 'page': {'size': 100, 'current': page_number}}, + note=f'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}') + total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none) + + for result in search_results.get('results') or []: + video_id_key, video_type = self._TYPES.get(traverse_obj(result, ('content_type', 'raw')), (None, None)) + video_id = traverse_obj(result, video_id_key, expected_type=int_or_none) + if video_id and video_type: + yield self.url_result(url=f'https://rokfin.com/{video_type}/{video_id}') + if not search_results.get('results'): + return + + def _run_search_query(self, video_id, data, **kwargs): + data = json.dumps(data).encode() + for attempt in range(2): + search_results = self._download_json( + self._db_url, video_id, data=data, fatal=(attempt == 1), + headers={'authorization': self._db_access_key}, **kwargs) + if search_results: + return search_results + self.write_debug('Updating access credentials') + self._get_db_access_credentials(video_id) + + def _get_db_access_credentials(self, video_id=None): + auth_data = {'SEARCH_KEY': None, 'ENDPOINT_BASE': None} + notfound_err_page = self._download_webpage( + 'https://rokfin.com/discover', video_id, expected_status=404, note='Downloading home page') + for js_file_path in re.findall(r'<script\b[^>]*\ssrc\s*=\s*"(/static/js/[^">]+)"', notfound_err_page): + js_content = self._download_webpage( + f'https://rokfin.com{js_file_path}', video_id, note='Downloading JavaScript file', fatal=False) + auth_data.update(re.findall( + rf'REACT_APP_({"|".join(auth_data.keys())})\s*:\s*"([^"]+)"', js_content or '')) + if not all(auth_data.values()): + continue + + self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json') + self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}' + self.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key)) + return + raise ExtractorError('Unable to extract access credentials') diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py new file mode 100644 index 0000000..5c62239 --- /dev/null +++ b/yt_dlp/extractor/roosterteeth.py @@ -0,0 +1,352 @@ +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + LazyList, + int_or_none, + join_nonempty, + parse_iso8601, + parse_qs, + smuggle_url, + str_or_none, + url_or_none, + urlencode_postdata, + urljoin, +) +from ..utils.traversal import traverse_obj + + +class RoosterTeethBaseIE(InfoExtractor): + _NETRC_MACHINE = 'roosterteeth' + _API_BASE = 'https://svod-be.roosterteeth.com' + _API_BASE_URL = f'{_API_BASE}/api/v1' + + def _perform_login(self, username, password): + if self._get_cookies(self._API_BASE_URL).get('rt_access_token'): + return + + try: + self._download_json( + 'https://auth.roosterteeth.com/oauth/token', + None, 'Logging in', data=urlencode_postdata({ + 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5', + 'grant_type': 'password', + 'username': username, + 'password': password, + })) + except ExtractorError as e: + msg = 'Unable to login' + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + resp = self._parse_json(e.cause.response.read().decode(), None, fatal=False) + if resp: + error = resp.get('extra_info') or resp.get('error_description') or resp.get('error') + if error: + msg += ': ' + error + self.report_warning(msg) + + def _extract_video_info(self, data): + thumbnails = [] + for image in traverse_obj(data, ('included', 'images')): + if image.get('type') not in ('episode_image', 'bonus_feature_image'): + continue + thumbnails.extend([{ + 'id': name, + 'url': url, + } for name, url in (image.get('attributes') or {}).items() if url_or_none(url)]) + + attributes = data.get('attributes') or {} + title = traverse_obj(attributes, 'title', 'display_title') + sub_only = attributes.get('is_sponsors_only') + + episode_id = str_or_none(data.get('uuid')) + video_id = str_or_none(data.get('id')) + if video_id and 'parent_content_id' in attributes: # parent_content_id is a bonus-only key + video_id += '-bonus' # there are collisions with bonus ids and regular ids + elif not video_id: + video_id = episode_id + + return { + 'id': video_id, + 'display_id': attributes.get('slug'), + 'title': title, + 'description': traverse_obj(attributes, 'description', 'caption'), + 'series': traverse_obj(attributes, 'show_title', 'parent_content_title'), + 'season_number': int_or_none(attributes.get('season_number')), + 'season_id': str_or_none(attributes.get('season_id')), + 'episode': title, + 'episode_number': int_or_none(attributes.get('number')), + 'episode_id': episode_id, + 'channel_id': attributes.get('channel_id'), + 'duration': int_or_none(attributes.get('length')), + 'release_timestamp': parse_iso8601(attributes.get('original_air_date')), + 'thumbnails': thumbnails, + 'availability': self._availability( + needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only, + is_private=False, is_unlisted=False), + 'tags': attributes.get('genres') + } + + +class RoosterTeethIE(RoosterTeethBaseIE): + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:bonus-feature|episode|watch)/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'info_dict': { + 'id': '9156', + 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'ext': 'mp4', + 'title': 'Million Dollars, But... The Game Announcement', + 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', + 'thumbnail': r're:^https?://.*\.png$', + 'series': 'Million Dollars, But...', + 'episode': 'Million Dollars, But... The Game Announcement', + 'tags': ['Game Show', 'Sketch'], + 'season_number': 2, + 'availability': 'public', + 'episode_number': 10, + 'episode_id': '00374575-464e-11e7-a302-065410f210c4', + 'season': 'Season 2', + 'season_id': 'ffa27d48-464d-11e7-a302-065410f210c4', + 'channel_id': '92b6bb21-91d2-4b1b-bf95-3268fa0d9939', + 'duration': 145, + 'release_timestamp': 1462982400, + 'release_date': '20160511', + }, + 'params': {'skip_download': True}, + }, { + 'url': 'https://roosterteeth.com/watch/rwby-bonus-25', + 'info_dict': { + 'id': '40432', + 'display_id': 'rwby-bonus-25', + 'title': 'Grimm', + 'description': 'md5:f30ff570741213418a8d2c19868b93ab', + 'episode': 'Grimm', + 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1', + 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', + 'ext': 'mp4', + 'availability': 'public', + 'episode_id': 'f8117b13-f068-499e-803e-eec9ea2dec8c', + 'episode_number': 3, + 'tags': ['Animation'], + 'season_id': '4b8f0a9e-12c4-41ed-8caa-fed15a85bab8', + 'season': 'Season 1', + 'series': 'RWBY: World of Remnant', + 'season_number': 1, + 'duration': 216, + 'release_timestamp': 1413489600, + 'release_date': '20141016', + }, + 'params': {'skip_download': True}, + }, { + # bonus feature with /watch/ url + 'url': 'https://roosterteeth.com/watch/rwby-bonus-21', + 'info_dict': { + 'id': '33-bonus', + 'display_id': 'rwby-bonus-21', + 'title': 'Volume 5 Yang Character Short', + 'description': 'md5:8c2440bc763ea90c52cfe0a68093e1f7', + 'episode': 'Volume 5 Yang Character Short', + 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1', + 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', + 'ext': 'mp4', + 'availability': 'public', + 'episode_id': 'f2a9f132-1fe2-44ad-8956-63d7c0267720', + 'episode_number': 55, + 'series': 'RWBY', + 'duration': 255, + 'release_timestamp': 1507993200, + 'release_date': '20171014', + }, + 'params': {'skip_download': True}, + }, { + # only works with video_data['attributes']['url'] m3u8 url + 'url': 'https://www.roosterteeth.com/watch/achievement-hunter-achievement-hunter-fatality-walkthrough-deathstroke-lex-luthor-captain-marvel-green-lantern-and-wonder-woman', + 'info_dict': { + 'id': '25394', + 'ext': 'mp4', + 'title': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman', + 'description': 'md5:91bb934698344fb9647b1c7351f16964', + 'availability': 'public', + 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', + 'episode': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman', + 'episode_number': 71, + 'episode_id': 'ffaec998-464d-11e7-a302-065410f210c4', + 'season': 'Season 2008', + 'tags': ['Gaming'], + 'series': 'Achievement Hunter', + 'display_id': 'md5:4465ce4f001735f9d7a2ae529a543d31', + 'season_id': 'ffa13340-464d-11e7-a302-065410f210c4', + 'season_number': 2008, + 'channel_id': '2cb2a70c-be50-46f5-93d7-84a1baabb4f7', + 'duration': 189, + 'release_timestamp': 1228317300, + 'release_date': '20081203', + }, + 'params': {'skip_download': True}, + }, { + # brightcove fallback extraction needed + 'url': 'https://roosterteeth.com/watch/lets-play-2013-126', + 'info_dict': { + 'id': '17845', + 'ext': 'mp4', + 'title': 'WWE \'13', + 'availability': 'public', + 'series': 'Let\'s Play', + 'episode_number': 10, + 'season_id': 'ffa23d9c-464d-11e7-a302-065410f210c4', + 'channel_id': '75ba87e8-06fd-4482-bad9-52a4da2c6181', + 'episode': 'WWE \'13', + 'episode_id': 'ffdbe55e-464d-11e7-a302-065410f210c4', + 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', + 'tags': ['Gaming', 'Our Favorites'], + 'description': 'md5:b4a5226d2bbcf0dafbde11a2ba27262d', + 'display_id': 'lets-play-2013-126', + 'season_number': 3, + 'season': 'Season 3', + 'release_timestamp': 1359999840, + 'release_date': '20130204', + }, + 'expected_warnings': ['Direct m3u8 URL returned HTTP Error 403'], + 'params': {'skip_download': True}, + }, { + 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', + 'only_matching': True, + }, { + 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', + 'only_matching': True, + }, { + 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', + 'only_matching': True, + }, { + 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', + 'only_matching': True, + }, { + # only available for FIRST members + 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', + 'only_matching': True, + }, { + 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'only_matching': True, + }, { + 'url': 'https://roosterteeth.com/bonus-feature/camp-camp-soundtrack-another-rap-song-about-foreign-cars-richie-branson', + 'only_matching': True, + }] + + _BRIGHTCOVE_ACCOUNT_ID = '6203312018001' + + def _extract_brightcove_formats_and_subtitles(self, bc_id, url, m3u8_url): + account_id = self._search_regex( + r'/accounts/(\d+)/videos/', m3u8_url, 'account id', default=self._BRIGHTCOVE_ACCOUNT_ID) + info = self._downloader.get_info_extractor('BrightcoveNew').extract(smuggle_url( + f'https://players.brightcove.net/{account_id}/default_default/index.html?videoId={bc_id}', + {'referrer': url})) + return info['formats'], info['subtitles'] + + def _real_extract(self, url): + display_id = self._match_id(url) + api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}' + + try: + video_data = self._download_json( + api_episode_url + '/videos', display_id, 'Downloading video JSON metadata', + headers={'Client-Type': 'web'})['data'][0] # web client-type yields ad-free streams + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + if self._parse_json(e.cause.response.read().decode(), display_id).get('access') is False: + self.raise_login_required( + '%s is only available for FIRST members' % display_id) + raise + + # XXX: additional ad-free URL at video_data['links']['download'] but often gives 403 errors + m3u8_url = video_data['attributes']['url'] + is_brightcove = traverse_obj(video_data, ('attributes', 'encoding_pipeline')) == 'brightcove' + bc_id = traverse_obj(video_data, ('attributes', 'uid', {str})) + + try: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') + except ExtractorError as e: + if is_brightcove and bc_id and isinstance(e.cause, HTTPError) and e.cause.status == 403: + self.report_warning( + 'Direct m3u8 URL returned HTTP Error 403; retrying with Brightcove extraction') + formats, subtitles = self._extract_brightcove_formats_and_subtitles(bc_id, url, m3u8_url) + else: + raise + + episode = self._download_json( + api_episode_url, display_id, + 'Downloading episode JSON metadata')['data'][0] + + return { + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + **self._extract_video_info(episode) + } + + +class RoosterTeethSeriesIE(RoosterTeethBaseIE): + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/series/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://roosterteeth.com/series/rwby?season=7', + 'playlist_count': 13, + 'info_dict': { + 'id': 'rwby-7', + 'title': 'RWBY - Season 7', + }, + }, { + 'url': 'https://roosterteeth.com/series/the-weird-place', + 'playlist_count': 7, + 'info_dict': { + 'id': 'the-weird-place', + 'title': 'The Weird Place', + }, + }, { + 'url': 'https://roosterteeth.com/series/role-initiative', + 'playlist_mincount': 16, + 'info_dict': { + 'id': 'role-initiative', + 'title': 'Role Initiative', + }, + }, { + 'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9', + 'playlist_mincount': 50, + 'info_dict': { + 'id': 'let-s-play-minecraft-9', + 'title': 'Let\'s Play Minecraft - Season 9', + }, + }] + + def _entries(self, series_id, season_number): + display_id = join_nonempty(series_id, season_number) + + def yield_episodes(data): + for episode in traverse_obj(data, ('data', lambda _, v: v['canonical_links']['self'])): + yield self.url_result( + urljoin('https://www.roosterteeth.com', episode['canonical_links']['self']), + RoosterTeethIE, **self._extract_video_info(episode)) + + series_data = self._download_json( + f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id) + for season_data in traverse_obj(series_data, ('data', lambda _, v: v['links']['episodes'])): + idx = traverse_obj(season_data, ('attributes', 'number')) + if season_number is not None and idx != season_number: + continue + yield from yield_episodes(self._download_json( + urljoin(self._API_BASE, season_data['links']['episodes']), display_id, + f'Downloading season {idx} JSON metadata', query={'per_page': 1000})) + + if season_number is None: # extract series-level bonus features + yield from yield_episodes(self._download_json( + f'{self._API_BASE_URL}/shows/{series_id}/bonus_features?order=asc&order_by&per_page=1000', + display_id, 'Downloading bonus features JSON metadata', fatal=False)) + + def _real_extract(self, url): + series_id = self._match_id(url) + season_number = traverse_obj(parse_qs(url), ('season', 0), expected_type=int_or_none) + + entries = LazyList(self._entries(series_id, season_number)) + return self.playlist_result( + entries, + join_nonempty(series_id, season_number), + join_nonempty(entries[0].get('series'), season_number, delim=' - Season ')) diff --git a/yt_dlp/extractor/rottentomatoes.py b/yt_dlp/extractor/rottentomatoes.py new file mode 100644 index 0000000..e357175 --- /dev/null +++ b/yt_dlp/extractor/rottentomatoes.py @@ -0,0 +1,80 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + float_or_none, + get_element_by_class, + join_nonempty, + traverse_obj, + url_or_none, +) + + +class RottenTomatoesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/(?P<playlist>[^/]+)(?:/(?P<tr>trailers)(?:/(?P<id>\w+))?)?' + + _TESTS = [{ + 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', + 'info_dict': { + 'id': '11028566', + 'ext': 'mp4', + 'title': 'Toy Story 3', + 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.' + }, + 'skip': 'No longer available', + }, { + 'url': 'https://www.rottentomatoes.com/m/toy_story_3/trailers/VycaVoBKhGuk', + 'info_dict': { + 'id': 'VycaVoBKhGuk', + 'ext': 'mp4', + 'title': 'Toy Story 3: Trailer 2', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 149.941 + }, + }, { + 'url': 'http://www.rottentomatoes.com/m/toy_story_3', + 'info_dict': { + 'id': 'toy_story_3', + 'title': 'Toy Story 3', + }, + 'playlist_mincount': 4, + }, { + 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers', + 'info_dict': { + 'id': 'toy_story_3-trailers', + }, + 'playlist_mincount': 5, + }] + + def _extract_videos(self, data, display_id): + for video in traverse_obj(data, (lambda _, v: v['publicId'] and v['file'] and v['type'] == 'hls')): + yield { + 'formats': self._extract_m3u8_formats( + video['file'], display_id, 'mp4', m3u8_id='hls', fatal=False), + **traverse_obj(video, { + 'id': 'publicId', + 'title': 'title', + 'description': 'description', + 'duration': ('durationInSeconds', {float_or_none}), + 'thumbnail': ('image', {url_or_none}), + }), + } + + def _real_extract(self, url): + playlist_id, trailers, video_id = self._match_valid_url(url).group('playlist', 'tr', 'id') + playlist_id = join_nonempty(playlist_id, trailers) + webpage = self._download_webpage(url, playlist_id) + data = self._search_json( + r'<script[^>]+\bid=["\'](?:heroV|v)ideos["\'][^>]*>', webpage, + 'data', playlist_id, contains_pattern=r'\[{(?s:.+)}\]') + + if video_id: + video_data = traverse_obj(data, lambda _, v: v['publicId'] == video_id) + if not video_data: + raise ExtractorError('Unable to extract video from webpage') + return next(self._extract_videos(video_data, video_id)) + + return self.playlist_result( + self._extract_videos(data, playlist_id), playlist_id, + clean_html(get_element_by_class('scoreboard__title', webpage))) diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py new file mode 100644 index 0000000..411a625 --- /dev/null +++ b/yt_dlp/extractor/rozhlas.py @@ -0,0 +1,363 @@ +import itertools + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + extract_attributes, + int_or_none, + remove_start, + str_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) + + +class RozhlasIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://prehravac.rozhlas.cz/audio/3421320', + 'md5': '504c902dbc9e9a1fd50326eccf02a7e2', + 'info_dict': { + 'id': '3421320', + 'ext': 'mp3', + 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)', + 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let' + } + }, { + 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', + 'only_matching': True, + }] + + def _real_extract(self, url): + audio_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id) + + title = self._html_search_regex( + r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', + webpage, 'title', default=None) or remove_start( + self._og_search_title(webpage), 'Radio Wave - ') + description = self._html_search_regex( + r'<p[^>]+title=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', + webpage, 'description', fatal=False, group='url') + duration = int_or_none(self._search_regex( + r'data-duration=["\'](\d+)', webpage, 'duration', default=None)) + + return { + 'id': audio_id, + 'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id, + 'title': title, + 'description': description, + 'duration': duration, + 'vcodec': 'none', + } + + +class RozhlasBaseIE(InfoExtractor): + def _extract_formats(self, entry, audio_id): + formats = [] + for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))): + ext = audio.get('variant') + for retry in self.RetryManager(): + if retry.attempt > 1: + self._sleep(1, audio_id) + try: + if ext == 'dash': + formats.extend(self._extract_mpd_formats( + audio['url'], audio_id, mpd_id=ext)) + elif ext == 'hls': + formats.extend(self._extract_m3u8_formats( + audio['url'], audio_id, 'm4a', m3u8_id=ext)) + else: + formats.append({ + 'url': audio['url'], + 'ext': ext, + 'format_id': ext, + 'abr': int_or_none(audio.get('bitrate')), + 'acodec': ext, + 'vcodec': 'none', + }) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 429: + retry.error = e.cause + else: + self.report_warning(e.msg) + + return formats + + +class RozhlasVltavaIE(RozhlasBaseIE): + _VALID_URL = r'https?://(?:\w+\.rozhlas|english\.radio)\.cz/[\w-]+-(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337', + 'md5': 'ba2fdbc1242fc16771c7695d271ec355', + 'info_dict': { + 'id': '8891337', + 'title': 'md5:21f99739d04ab49d8c189ec711eef4ec', + }, + 'playlist_count': 1, + 'playlist': [{ + 'md5': 'ba2fdbc1242fc16771c7695d271ec355', + 'info_dict': { + 'id': '10520988', + 'ext': 'mp3', + 'title': 'Papej masíčko! Porcujeme a bilancujeme filmy a seriály, které to letos zabily', + 'description': 'md5:1c6d29fb9564e1f17fc1bb83ae7da0bc', + 'duration': 1574, + 'artist': 'Aleš Stuchlý', + 'channel_id': 'radio-wave', + }, + }] + }, { + 'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744', + 'info_dict': { + 'id': '8554744', + 'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko', + }, + 'playlist_count': 5, + 'playlist': [{ + 'md5': '93d4109cf8f40523699ae9c1d4600bdd', + 'info_dict': { + 'id': '9890713', + 'ext': 'mp3', + 'title': 'Neklid #1', + 'description': '1. díl: Neklid: 1. díl', + 'duration': 1025, + 'artist': 'Josef Kokta', + 'channel_id': 'radio-wave', + 'chapter': 'Neklid #1', + 'chapter_number': 1, + }, + }, { + 'md5': 'e9763235be4a6dcf94bc8a5bac1ca126', + 'info_dict': { + 'id': '9890716', + 'ext': 'mp3', + 'title': 'Neklid #2', + 'description': '2. díl: Neklid: 2. díl', + 'duration': 768, + 'artist': 'Josef Kokta', + 'channel_id': 'radio-wave', + 'chapter': 'Neklid #2', + 'chapter_number': 2, + }, + }, { + 'md5': '00b642ea94b78cc949ac84da09f87895', + 'info_dict': { + 'id': '9890722', + 'ext': 'mp3', + 'title': 'Neklid #3', + 'description': '3. díl: Neklid: 3. díl', + 'duration': 607, + 'artist': 'Josef Kokta', + 'channel_id': 'radio-wave', + 'chapter': 'Neklid #3', + 'chapter_number': 3, + }, + }, { + 'md5': 'faef97b1b49da7df874740f118c19dea', + 'info_dict': { + 'id': '9890728', + 'ext': 'mp3', + 'title': 'Neklid #4', + 'description': '4. díl: Neklid: 4. díl', + 'duration': 621, + 'artist': 'Josef Kokta', + 'channel_id': 'radio-wave', + 'chapter': 'Neklid #4', + 'chapter_number': 4, + }, + }, { + 'md5': '6e729fa39b647325b868d419c76f3efa', + 'info_dict': { + 'id': '9890734', + 'ext': 'mp3', + 'title': 'Neklid #5', + 'description': '5. díl: Neklid: 5. díl', + 'duration': 908, + 'artist': 'Josef Kokta', + 'channel_id': 'radio-wave', + 'chapter': 'Neklid #5', + 'chapter_number': 5, + }, + }] + }, { + 'url': 'https://dvojka.rozhlas.cz/karel-siktanc-cerny-jezdec-bily-kun-napinava-pohadka-o-tajemnem-prizraku-8946969', + 'info_dict': { + 'id': '8946969', + 'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '10631121', + 'ext': 'm4a', + 'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku', + 'description': 'Karel Šiktanc: Černý jezdec, bílý kůň', + 'duration': 2656, + 'artist': 'Tvůrčí skupina Drama a literatura', + 'channel_id': 'dvojka', + }, + }], + 'params': {'skip_download': 'dash'}, + }] + + def _extract_video(self, entry): + audio_id = entry['meta']['ga']['contentId'] + chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none})) + + return { + 'id': audio_id, + 'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None, + 'chapter_number': chapter_number, + 'formats': self._extract_formats(entry, audio_id), + **traverse_obj(entry, { + 'title': ('meta', 'ga', 'contentName'), + 'description': 'title', + 'duration': ('duration', {int_or_none}), + 'artist': ('meta', 'ga', 'contentAuthor'), + 'channel_id': ('meta', 'ga', 'contentCreator'), + }) + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # FIXME: Use get_element_text_and_html_by_tag when it accepts less strict html + data = self._parse_json(extract_attributes(self._search_regex( + r'(<div class="mujRozhlasPlayer" data-player=\'[^\']+\'>)', + webpage, 'player'))['data-player'], video_id)['data'] + + return { + '_type': 'playlist', + 'id': str_or_none(data.get('embedId')) or video_id, + 'title': traverse_obj(data, ('series', 'title')), + 'entries': map(self._extract_video, data['playlist']), + } + + +class MujRozhlasIE(RozhlasBaseIE): + _VALID_URL = r'https?://(?:www\.)?mujrozhlas\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + # single episode extraction + 'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli', + 'md5': '6f8fd68663e64936623e67c152a669e0', + 'info_dict': { + 'id': '10787730', + 'ext': 'mp3', + 'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli', + 'description': 'md5:db7141e9caaedc9041ec7cefb9a62908', + 'timestamp': 1684915200, + 'modified_timestamp': 1687550432, + 'series': 'Vykopávky', + 'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg', + 'channel_id': 'radio-wave', + 'upload_date': '20230524', + 'modified_date': '20230623', + }, + }, { + # serial extraction + 'url': 'https://www.mujrozhlas.cz/radiokniha/jaroslava-janackova-pribeh-tajemneho-psani-o-pramenech-genezi-babicky', + 'playlist_mincount': 7, + 'info_dict': { + 'id': 'bb2b5f4e-ffb4-35a6-a34a-046aa62d6f6b', + 'title': 'Jaroslava Janáčková: Příběh tajemného psaní. O pramenech a genezi Babičky', + 'description': 'md5:7434d8fac39ac9fee6df098e11dfb1be', + }, + }, { + # show extraction + 'url': 'https://www.mujrozhlas.cz/nespavci', + 'playlist_mincount': 14, + 'info_dict': { + 'id': '09db9b37-d0f4-368c-986a-d3439f741f08', + 'title': 'Nespavci', + 'description': 'md5:c430adcbf9e2b9eac88b745881e814dc', + }, + }, { + # serialPart + 'url': 'https://www.mujrozhlas.cz/povidka/gustavo-adolfo-becquer-hora-duchu', + 'info_dict': { + 'id': '8889035', + 'ext': 'm4a', + 'title': 'Gustavo Adolfo Bécquer: Hora duchů', + 'description': 'md5:343a15257b376c276e210b78e900ffea', + 'chapter': 'Hora duchů a Polibek – dva tajemné příběhy Gustava Adolfa Bécquera', + 'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/2adfe1387fb140634be725c1ccf26214.jpg', + 'timestamp': 1708173000, + 'episode': 'Episode 1', + 'episode_number': 1, + 'series': 'Povídka', + 'modified_date': '20240217', + 'upload_date': '20240217', + 'modified_timestamp': 1708173198, + 'channel_id': 'vltava', + }, + 'params': {'skip_download': 'dash'}, + }] + + def _call_api(self, path, item_id, msg='API JSON'): + return self._download_json( + f'https://api.mujrozhlas.cz/{path}/{item_id}', item_id, + note=f'Downloading {msg}', errnote=f'Failed to download {msg}')['data'] + + def _extract_audio_entry(self, entry): + audio_id = entry['meta']['ga']['contentId'] + + return { + 'id': audio_id, + 'formats': self._extract_formats(entry['attributes'], audio_id), + **traverse_obj(entry, { + 'title': ('attributes', 'title'), + 'description': ('attributes', 'description'), + 'episode_number': ('attributes', 'part'), + 'series': ('attributes', 'mirroredShow', 'title'), + 'chapter': ('attributes', 'mirroredSerial', 'title'), + 'artist': ('meta', 'ga', 'contentAuthor'), + 'channel_id': ('meta', 'ga', 'contentCreator'), + 'timestamp': ('attributes', 'since', {unified_timestamp}), + 'modified_timestamp': ('attributes', 'updated', {unified_timestamp}), + 'thumbnail': ('attributes', 'asset', 'url', {url_or_none}), + }) + } + + def _entries(self, api_url, playlist_id): + for page in itertools.count(1): + episodes = self._download_json( + api_url, playlist_id, note=f'Downloading episodes page {page}', + errnote=f'Failed to download episodes page {page}', fatal=False) + for episode in traverse_obj(episodes, ('data', lambda _, v: v['meta']['ga']['contentId'])): + yield self._extract_audio_entry(episode) + api_url = traverse_obj(episodes, ('links', 'next', {url_or_none})) + if not api_url: + break + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + info = self._search_json(r'\bvar\s+dl\s*=', webpage, 'info json', display_id) + + entity = info['siteEntityBundle'] + + if entity in ('episode', 'serialPart'): + return self._extract_audio_entry(self._call_api( + 'episodes', info['contentId'], 'episode info API JSON')) + + elif entity in ('show', 'serial'): + playlist_id = info['contentShow'].split(':')[0] if entity == 'show' else info['contentId'] + data = self._call_api(f'{entity}s', playlist_id, f'{entity} playlist JSON') + api_url = data['relationships']['episodes']['links']['related'] + return self.playlist_result( + self._entries(api_url, playlist_id), playlist_id, + **traverse_obj(data, ('attributes', { + 'title': 'title', + 'description': 'description', + }))) + + else: + # `entity == 'person'` not implemented yet by API, ref: + # https://api.mujrozhlas.cz/persons/8367e456-2a57-379a-91bb-e699619bea49/participation + raise ExtractorError(f'Unsupported entity type "{entity}"') diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py new file mode 100644 index 0000000..7ba80d4 --- /dev/null +++ b/yt_dlp/extractor/rte.py @@ -0,0 +1,162 @@ +import re + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + float_or_none, + parse_iso8601, + str_or_none, + try_get, + unescapeHTML, + url_or_none, + ExtractorError, +) + + +class RteBaseIE(InfoExtractor): + def _real_extract(self, url): + item_id = self._match_id(url) + + info_dict = {} + formats = [] + + ENDPOINTS = ( + 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=', + 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=', + ) + + for num, ep_url in enumerate(ENDPOINTS, start=1): + try: + data = self._download_json(ep_url + item_id, item_id) + except ExtractorError as ee: + if num < len(ENDPOINTS) or formats: + continue + if isinstance(ee.cause, HTTPError) and ee.cause.status == 404: + error_info = self._parse_json(ee.cause.response.read().decode(), item_id, fatal=False) + if error_info: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error_info['message']), + expected=True) + raise + + # NB the string values in the JSON are stored using XML escaping(!) + show = try_get(data, lambda x: x['shows'][0], dict) + if not show: + continue + + if not info_dict: + title = unescapeHTML(show['title']) + description = unescapeHTML(show.get('description')) + thumbnail = show.get('thumbnail') + duration = float_or_none(show.get('duration'), 1000) + timestamp = parse_iso8601(show.get('published')) + info_dict = { + 'id': item_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + } + + mg = try_get(show, lambda x: x['media:group'][0], dict) + if not mg: + continue + + if mg.get('url'): + m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) + if m: + m = m.groupdict() + formats.append({ + 'url': m['url'] + '/' + m['app'], + 'app': m['app'], + 'play_path': m['playpath'], + 'player_url': url, + 'ext': 'flv', + 'format_id': 'rtmp', + }) + + if mg.get('hls_server') and mg.get('hls_url'): + formats.extend(self._extract_m3u8_formats( + mg['hls_server'] + mg['hls_url'], item_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + + if mg.get('hds_server') and mg.get('hds_url'): + formats.extend(self._extract_f4m_formats( + mg['hds_server'] + mg['hds_url'], item_id, + f4m_id='hds', fatal=False)) + + mg_rte_server = str_or_none(mg.get('rte:server')) + mg_url = str_or_none(mg.get('url')) + if mg_rte_server and mg_url: + hds_url = url_or_none(mg_rte_server + mg_url) + if hds_url: + formats.extend(self._extract_f4m_formats( + hds_url, item_id, f4m_id='hds', fatal=False)) + + info_dict['formats'] = formats + return info_dict + + +class RteIE(RteBaseIE): + IE_NAME = 'rte' + IE_DESC = 'Raidió Teilifís Éireann TV' + _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', + 'md5': '4a76eb3396d98f697e6e8110563d2604', + 'info_dict': { + 'id': '10478715', + 'ext': 'mp4', + 'title': 'iWitness', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'The spirit of Ireland, one voice and one minute at a time.', + 'duration': 60.046, + 'upload_date': '20151012', + 'timestamp': 1444694160, + }, + } + + +class RteRadioIE(RteBaseIE): + IE_NAME = 'rte:radio' + IE_DESC = 'Raidió Teilifís Éireann radio' + # Radioplayer URLs have two distinct specifier formats, + # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>: + # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_ + # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. + # An <id> uniquely defines an individual recording, and is the only part we require. + _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)' + + _TESTS = [{ + # Old-style player URL; HLS and RTMPE formats + 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', + 'md5': 'c79ccb2c195998440065456b69760411', + 'info_dict': { + 'id': '10507902', + 'ext': 'mp4', + 'title': 'Gloria', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', + 'timestamp': 1451203200, + 'upload_date': '20151227', + 'duration': 7230.0, + }, + }, { + # New-style player URL; RTMPE formats only + 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_', + 'info_dict': { + 'id': '3250678', + 'ext': 'flv', + 'title': 'The Lyric Concert with Paul Herriott', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': '', + 'timestamp': 1333742400, + 'upload_date': '20120406', + 'duration': 7199.016, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }] diff --git a/yt_dlp/extractor/rtl2.py b/yt_dlp/extractor/rtl2.py new file mode 100644 index 0000000..07e1aa3 --- /dev/null +++ b/yt_dlp/extractor/rtl2.py @@ -0,0 +1,95 @@ +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class RTL2IE(InfoExtractor): + IE_NAME = 'rtl2' + _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', + 'info_dict': { + 'id': 'folge-203-0', + 'ext': 'f4v', + 'title': 'GRIP sucht den Sommerkönig', + 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f' + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }, { + 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', + 'info_dict': { + 'id': 'anna-erwischt-alex', + 'ext': 'mp4', + 'title': 'Anna erwischt Alex!', + 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }] + + def _real_extract(self, url): + vico_id, vivi_id, display_id = self._match_valid_url(url).groups() + if not vico_id: + webpage = self._download_webpage(url, display_id) + + mobj = re.search( + r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"', + webpage) + if mobj: + vico_id = mobj.group('vico_id') + vivi_id = mobj.group('vivi_id') + else: + vico_id = self._html_search_regex( + r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') + vivi_id = self._html_search_regex( + r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') + + info = self._download_json( + 'https://service.rtl2.de/api-player-vipo/video.php', + display_id, query={ + 'vico_id': vico_id, + 'vivi_id': vivi_id, + }) + video_info = info['video'] + title = video_info['titel'] + + formats = [] + + rtmp_url = video_info.get('streamurl') + if rtmp_url: + rtmp_url = rtmp_url.replace('\\', '') + stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL') + rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] + + formats.append({ + 'format_id': 'rtmp', + 'url': rtmp_url, + 'play_path': stream_url, + 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf', + 'page_url': url, + 'flash_version': 'LNX 11,2,202,429', + 'rtmp_conn': rtmp_conn, + 'no_resume': True, + 'quality': 1, + }) + + m3u8_url = video_info.get('streamurl_hls') + if m3u8_url: + formats.extend(self._extract_akamai_formats(m3u8_url, display_id)) + + return { + 'id': display_id, + 'title': title, + 'thumbnail': video_info.get('image'), + 'description': video_info.get('beschreibung'), + 'duration': int_or_none(video_info.get('duration')), + 'formats': formats, + } diff --git a/yt_dlp/extractor/rtlnl.py b/yt_dlp/extractor/rtlnl.py new file mode 100644 index 0000000..724cb64 --- /dev/null +++ b/yt_dlp/extractor/rtlnl.py @@ -0,0 +1,294 @@ +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, +) + + +class RtlNlIE(InfoExtractor): + IE_NAME = 'rtl.nl' + IE_DESC = 'rtl.nl and rtlxl.nl' + _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)'] + _VALID_URL = r'''(?x) + https?://(?:(?:www|static)\.)? + (?: + rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| + rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)| + embed\.rtl\.nl/\#uuid= + ) + (?P<id>[0-9a-f-]+)''' + + _TESTS = [{ + # new URL schema + 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f', + 'md5': '490428f1187b60d714f34e1f2e3af0b6', + 'info_dict': { + 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f', + 'ext': 'mp4', + 'title': 'RTL Nieuws', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'timestamp': 1593293400, + 'upload_date': '20200627', + 'duration': 661.08, + }, + }, { + # old URL schema + 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', + 'md5': '473d1946c1fdd050b2c0161a4b13c373', + 'info_dict': { + 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416', + 'ext': 'mp4', + 'title': 'RTL Nieuws', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'timestamp': 1461951000, + 'upload_date': '20160429', + 'duration': 1167.96, + }, + 'skip': '404', + }, { + # best format available a3t + 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', + 'md5': 'dea7474214af1271d91ef332fb8be7ea', + 'info_dict': { + 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed', + 'ext': 'mp4', + 'timestamp': 1424039400, + 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', + 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', + 'upload_date': '20150215', + 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', + } + }, { + # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275) + # best format available nettv + 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', + 'info_dict': { + 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', + 'ext': 'mp4', + 'title': 'RTL Nieuws - Meer beelden van overval juwelier', + 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', + 'timestamp': 1437233400, + 'upload_date': '20150718', + 'duration': 30.474, + }, + 'params': { + 'skip_download': True, + }, + }, { + # encrypted m3u8 streams, georestricted + 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7', + 'only_matching': True, + }, { + 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', + 'only_matching': True, + }, { + 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', + 'only_matching': True, + }, { + 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/', + 'only_matching': True, + }, { + 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', + 'only_matching': True, + }, { + # new embed URL schema + 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', + 'only_matching': True, + }] + + def _real_extract(self, url): + uuid = self._match_id(url) + info = self._download_json( + 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid, + uuid) + + material = info['material'][0] + title = info['abstracts'][0]['name'] + subtitle = material.get('title') + if subtitle: + title += ' - %s' % subtitle + description = material.get('synopsis') + + meta = info.get('meta', {}) + + videopath = material['videopath'] + m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath + + formats = self._extract_m3u8_formats( + m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) + + thumbnails = [] + + for p in ('poster_base_url', '"thumb_base_url"'): + if not meta.get(p): + continue + + thumbnails.append({ + 'url': self._proto_relative_url(meta[p] + uuid), + 'width': int_or_none(self._search_regex( + r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), + 'height': int_or_none(self._search_regex( + r'/sz=[0-9]+x([0-9]+)', + meta[p], 'thumbnail height', fatal=False)) + }) + + return { + 'id': uuid, + 'title': title, + 'formats': formats, + 'timestamp': material['original_date'], + 'description': description, + 'duration': parse_duration(material.get('duration')), + 'thumbnails': thumbnails, + } + + +class RTLLuBaseIE(InfoExtractor): + _MEDIA_REGEX = { + 'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)', + 'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)', + 'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)', + } + + def get_media_url(self, webpage, video_id, media_type): + return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None) + + def get_formats_and_subtitles(self, webpage, video_id): + video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio') + + formats, subtitles = [], {} + if video_url is not None: + formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id) + if audio_url is not None: + formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'}) + + return formats, subtitles + + def _real_extract(self, url): + video_id = self._match_id(url) + is_live = video_id in ('live', 'live-2', 'lauschteren') + + # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id> + # we can context from <rtl-comments context=<context> in webpage + webpage = self._download_webpage(url, video_id) + + formats, subtitles = self.get_formats_and_subtitles(webpage, video_id) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage, default=None), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None), + 'is_live': is_live, + } + + +class RTLLuTeleVODIE(RTLLuBaseIE): + IE_NAME = 'rtl.lu:tele-vod' + _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?' + _TESTS = [{ + 'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html', + 'info_dict': { + 'id': '3266757', + 'title': 'Informatiounsversammlung Héichwaasser', + 'ext': 'mp4', + 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg', + 'description': 'md5:b1db974408cc858c9fd241812e4a2a14', + } + }, { + 'url': 'https://www.rtl.lu/video/3295215', + 'info_dict': { + 'id': '3295215', + 'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht', + 'ext': 'mp4', + 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg', + 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b', + } + }] + + +class RTLLuArticleIE(RTLLuBaseIE): + IE_NAME = 'rtl.lu:article' + _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html' + _TESTS = [{ + # Audio-only + 'url': 'https://www.rtl.lu/sport/news/a/1934360.html', + 'info_dict': { + 'id': '1934360', + 'ext': 'mp3', + 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg', + 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7', + 'title': 'md5:40aa85f135578fbd549d3c9370321f99', + } + }, { + # 5minutes + 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html', + 'info_dict': { + 'id': '1853173', + 'ext': 'mp4', + 'description': 'md5:ac031da0740e997a5cf4633173634fee', + 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46', + 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg', + } + }, { + # today.lu + 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html', + 'info_dict': { + 'id': '1936203', + 'ext': 'mp4', + 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower', + 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...', + 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg', + } + }] + + +class RTLLuLiveIE(RTLLuBaseIE): + _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)' + _TESTS = [{ + # Tele:live + 'url': 'https://www.rtl.lu/tele/live', + 'info_dict': { + 'id': 'live', + 'ext': 'mp4', + 'live_status': 'is_live', + 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg', + } + }, { + # Tele:live-2 + 'url': 'https://www.rtl.lu/tele/live-2', + 'info_dict': { + 'id': 'live-2', + 'ext': 'mp4', + 'live_status': 'is_live', + 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg', + } + }, { + # Radio:lauschteren + 'url': 'https://www.rtl.lu/radio/lauschteren', + 'info_dict': { + 'id': 'lauschteren', + 'ext': 'mp4', + 'live_status': 'is_live', + 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg', + } + }] + + +class RTLLuRadioIE(RTLLuBaseIE): + _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?' + _TESTS = [{ + 'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html', + 'info_dict': { + 'id': '4033058', + 'ext': 'mp3', + 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9', + 'title': '5 vir 12 - Stau um Stau', + 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg', + } + }] diff --git a/yt_dlp/extractor/rtnews.py b/yt_dlp/extractor/rtnews.py new file mode 100644 index 0000000..6be9945 --- /dev/null +++ b/yt_dlp/extractor/rtnews.py @@ -0,0 +1,196 @@ +import re + +from .common import InfoExtractor +from ..utils import js_to_json + + +class RTNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rt\.com/[^/]+/(?:[^/]+/)?(?P<id>\d+)' + + _TESTS = [{ + 'url': 'https://www.rt.com/sport/546301-djokovic-arrives-belgrade-crowds/', + 'playlist_mincount': 2, + 'info_dict': { + 'id': '546301', + 'title': 'Crowds gather to greet deported Djokovic as he returns to Serbia (VIDEO)', + 'description': 'md5:1d5bfe1a988d81fd74227cfdf93d314d', + 'thumbnail': 'https://cdni.rt.com/files/2022.01/article/61e587a085f540102c3386c1.png' + }, + }, { + 'url': 'https://www.rt.com/shows/in-question/535980-plot-to-assassinate-julian-assange/', + 'playlist_mincount': 1, + 'info_dict': { + 'id': '535980', + 'title': 'The plot to assassinate Julian Assange', + 'description': 'md5:55279ce5e4441dc1d16e2e4a730152cd', + 'thumbnail': 'https://cdni.rt.com/files/2021.09/article/615226f42030274e8879b53d.png' + }, + 'playlist': [{ + 'info_dict': { + 'id': '6152271d85f5400464496162', + 'ext': 'mp4', + 'title': '6152271d85f5400464496162', + }, + }] + }] + + def _entries(self, webpage): + video_urls = set(re.findall(r'https://cdnv\.rt\.com/.*[a-f0-9]+\.mp4', webpage)) + for v_url in video_urls: + v_id = re.search(r'([a-f0-9]+)\.mp4', v_url).group(1) + if v_id: + yield { + 'id': v_id, + 'title': v_id, + 'url': v_url, + } + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + + return { + '_type': 'playlist', + 'id': id, + 'entries': self._entries(webpage), + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } + + +class RTDocumentryIE(InfoExtractor): + _VALID_URL = r'https?://rtd\.rt\.com/(?:(?:series|shows)/[^/]+|films)/(?P<id>[^/?$&#]+)' + + _TESTS = [{ + 'url': 'https://rtd.rt.com/films/escobars-hitman/', + 'info_dict': { + 'id': 'escobars-hitman', + 'ext': 'mp4', + 'title': "Escobar's Hitman. Former drug-gang killer, now loved and loathed in Colombia", + 'description': 'md5:647c76984b7cb9a8b52a567e87448d88', + 'thumbnail': 'https://cdni.rt.com/rtd-files/films/escobars-hitman/escobars-hitman_11.jpg', + 'average_rating': 8.53, + 'duration': 3134.0 + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/iskander-tactical-system-natos-headache/', + 'info_dict': { + 'id': 'iskander-tactical-system-natos-headache', + 'ext': 'mp4', + 'title': "Iskander tactical system. NATO's headache | The Kalashnikova Show. Episode 10", + 'description': 'md5:da7c24a0aa67bc2bb88c86658508ca87', + 'thumbnail': 'md5:89de8ce38c710b7c501ff02d47e2aa89', + 'average_rating': 9.27, + 'duration': 274.0, + 'timestamp': 1605726000, + 'view_count': int, + 'upload_date': '20201118' + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/introduction-to-safe-digital-life-ep2/', + 'info_dict': { + 'id': 'introduction-to-safe-digital-life-ep2', + 'ext': 'mp4', + 'title': 'How to Keep your Money away from Hackers | I am Hacked. Episode 2', + 'description': 'md5:c46fa9a5af86c0008c45a3940a8cce87', + 'thumbnail': 'md5:a5e81b9bf5aed8f5e23d9c053601b825', + 'average_rating': 10.0, + 'duration': 1524.0, + 'timestamp': 1636977600, + 'view_count': int, + 'upload_date': '20211115' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + ld_json = self._search_json_ld(webpage, None, fatal=False) + if not ld_json: + self.raise_no_formats('No video/audio found at the provided url.', expected=True) + media_json = self._parse_json( + self._search_regex(r'(?s)\'Med\'\s*:\s*\[\s*({.+})\s*\]\s*};', webpage, 'media info'), + id, transform_source=js_to_json) + if 'title' not in ld_json and 'title' in media_json: + ld_json['title'] = media_json['title'] + formats = [{'url': src['file']} for src in media_json.get('sources') or [] if src.get('file')] + + return { + 'id': id, + 'thumbnail': media_json.get('image'), + 'formats': formats, + **ld_json + } + + +class RTDocumentryPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://rtd\.rt\.com/(?:series|shows)/(?P<id>[^/]+)/$' + + _TESTS = [{ + 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/', + 'playlist_mincount': 6, + 'info_dict': { + 'id': 'i-am-hacked-trailer', + }, + }, { + 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/', + 'playlist_mincount': 34, + 'info_dict': { + 'id': 'the-kalashnikova-show-military-secrets-anna-knishenko', + }, + }] + + def _entries(self, webpage, id): + video_urls = set(re.findall(r'list-2__link\s*"\s*href="([^"]+)"', webpage)) + for v_url in video_urls: + if id not in v_url: + continue + yield self.url_result( + 'https://rtd.rt.com%s' % v_url, + ie=RTDocumentryIE.ie_key()) + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + + return { + '_type': 'playlist', + 'id': id, + 'entries': self._entries(webpage, id), + } + + +class RuptlyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruptly\.tv/[a-z]{2}/videos/(?P<id>\d+-\d+)' + + _TESTS = [{ + 'url': 'https://www.ruptly.tv/en/videos/20220112-020-Japan-Double-trouble-Tokyo-zoo-presents-adorable-panda-twins', + 'info_dict': { + 'id': '20220112-020', + 'ext': 'mp4', + 'title': 'Japan: Double trouble! Tokyo zoo presents adorable panda twins | Video Ruptly', + 'description': 'md5:85a8da5fdb31486f0562daf4360ce75a', + 'thumbnail': 'https://storage.ruptly.tv/thumbnails/20220112-020/i6JQKnTNpYuqaXsR/i6JQKnTNpYuqaXsR.jpg' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + m3u8_url = self._search_regex(r'preview_url"\s?:\s?"(https?://storage\.ruptly\.tv/video_projects/.+\.m3u8)"', webpage, 'm3u8 url', fatal=False) + if not m3u8_url: + self.raise_no_formats('No video/audio found at the provided url.', expected=True) + formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, id, ext='mp4') + return { + 'id': id, + 'formats': formats, + 'subtitles': subs, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py new file mode 100644 index 0000000..5928a20 --- /dev/null +++ b/yt_dlp/extractor/rtp.py @@ -0,0 +1,97 @@ +from .common import InfoExtractor +from ..utils import js_to_json +import re +import json +import urllib.parse +import base64 + + +class RTPIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' + _TESTS = [{ + 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', + 'md5': 'e736ce0c665e459ddb818546220b4ef8', + 'info_dict': { + 'id': 'e174042', + 'ext': 'mp3', + 'title': 'Paixões Cruzadas', + 'description': 'As paixões musicais de António Cartaxo e António Macedo', + 'thumbnail': r're:^https?://.*\.jpg', + }, + }, { + 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', + 'only_matching': True, + }] + + _RX_OBFUSCATION = re.compile(r'''(?xs) + atob\s*\(\s*decodeURIComponent\s*\(\s* + (\[[0-9A-Za-z%,'"]*\]) + \s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\) + ''') + + def __unobfuscate(self, data, *, video_id): + if data.startswith('{'): + data = self._RX_OBFUSCATION.sub( + lambda m: json.dumps( + base64.b64decode(urllib.parse.unquote( + ''.join(self._parse_json(m.group(1), video_id)) + )).decode('iso-8859-1')), + data) + return js_to_json(data) + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._html_search_meta( + 'twitter:title', webpage, display_name='title', fatal=True) + + f, config = self._search_regex( + r'''(?sx) + var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s* + var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/) + ''', webpage, + 'player config', group=('f', 'config')) + + f = self._parse_json( + f, video_id, + lambda data: self.__unobfuscate(data, video_id=video_id)) + config = self._parse_json( + config, video_id, + lambda data: self.__unobfuscate(data, video_id=video_id)) + + formats = [] + if isinstance(f, dict): + f_hls = f.get('hls') + if f_hls is not None: + formats.extend(self._extract_m3u8_formats( + f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')) + + f_dash = f.get('dash') + if f_dash is not None: + formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash')) + else: + formats.append({ + 'format_id': 'f', + 'url': f, + 'vcodec': 'none' if config.get('mediaType') == 'audio' else None, + }) + + subtitles = {} + + vtt = config.get('vtt') + if vtt is not None: + for lcode, lname, url in vtt: + subtitles.setdefault(lcode, []).append({ + 'name': lname, + 'url': url, + }) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': self._html_search_meta(['description', 'twitter:description'], webpage), + 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/rtrfm.py b/yt_dlp/extractor/rtrfm.py new file mode 100644 index 0000000..7381d82 --- /dev/null +++ b/yt_dlp/extractor/rtrfm.py @@ -0,0 +1,65 @@ +from .common import InfoExtractor + + +class RTRFMIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P<id>[^/?\#&]+)' + _TESTS = [ + { + 'url': 'https://rtrfm.com.au/shows/breakfast/', + 'md5': '46168394d3a5ce237cf47e85d0745413', + 'info_dict': { + 'id': 'breakfast-2021-11-16', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + 'skip': 'ID and md5 changes daily', + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/', + 'md5': '396bedf1e40f96c62b30d4999202a790', + 'info_dict': { + 'id': 'breakfast-2021-11-11', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2021-11-11', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/', + 'md5': '594027f513ec36a24b15d65007a24dff', + 'info_dict': { + 'id': 'breakfast-2020-06-01', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2020-06-01', + 'description': r're:^Breakfast with Taylah ', + }, + 'skip': 'This audio has expired', + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + show, date, title = self._search_regex( + r'''\.playShow(?:From)?\(['"](?P<show>[^'"]+)['"],\s*['"](?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P<title>[^'"]+)['"]''', + webpage, 'details', group=('show', 'date', 'title')) + url = self._download_json( + 'https://restreams.rtrfm.com.au/rzz', + show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u'] + # This is the only indicator of an error until trying to download the URL and + # downloads of mp4 URLs always fail (403 for current episodes, 404 for missing). + if '.mp4' in url: + url = None + self.raise_no_formats('Expired or no episode on this date', expected=True) + return { + 'id': '%s-%s' % (show, date), + 'title': '%s %s' % (title, date), + 'series': title, + 'url': url, + 'release_date': date, + 'description': self._og_search_description(webpage), + } diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py new file mode 100644 index 0000000..bce5cba --- /dev/null +++ b/yt_dlp/extractor/rts.py @@ -0,0 +1,232 @@ +import re + +from .srgssr import SRGSSRIE +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + parse_duration, + parse_iso8601, + unescapeHTML, + urljoin, +) + + +class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE + _WORKING = False + IE_DESC = 'RTS.ch' + _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html' + + _TESTS = [ + { + 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', + 'md5': '753b877968ad8afaeddccc374d4256a5', + 'info_dict': { + 'id': '3449373', + 'display_id': 'les-enfants-terribles', + 'ext': 'mp4', + 'duration': 1488, + 'title': 'Les Enfants Terribles', + 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', + 'uploader': 'Divers', + 'upload_date': '19680921', + 'timestamp': -40280400, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }, + { + 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', + 'info_dict': { + 'id': '5624065', + 'title': 'Passe-moi les jumelles', + }, + 'playlist_mincount': 4, + }, + { + 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', + 'info_dict': { + 'id': '5745975', + 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski', + 'ext': 'mp4', + 'duration': 48, + 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', + 'description': 'Hockey - Playoff', + 'uploader': 'Hockey', + 'upload_date': '20140403', + 'timestamp': 1396556882, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + 'skip': 'Blocked outside Switzerland', + }, + { + 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', + 'md5': '9bb06503773c07ce83d3cbd793cebb91', + 'info_dict': { + 'id': '5745356', + 'display_id': 'londres-cachee-par-un-epais-smog', + 'ext': 'mp4', + 'duration': 33, + 'title': 'Londres cachée par un épais smog', + 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', + 'uploader': 'L\'actu en vidéo', + 'upload_date': '20140403', + 'timestamp': 1396537322, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }, + { + 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', + 'md5': 'dd8ef6a22dff163d063e2a52bc8adcae', + 'info_dict': { + 'id': '5706148', + 'display_id': 'urban-hippie-de-damien-krisl-03-04-2014', + 'ext': 'mp3', + 'duration': 123, + 'title': '"Urban Hippie", de Damien Krisl', + 'description': 'Des Hippies super glam.', + 'upload_date': '20140403', + 'timestamp': 1396551600, + }, + }, + { + # article with videos on rhs + 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html', + 'info_dict': { + 'id': '6693917', + 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', + }, + 'playlist_mincount': 5, + }, + { + 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + m = self._match_valid_url(url) + media_id = m.group('rts_id') or m.group('id') + display_id = m.group('display_id') or media_id + + def download_json(internal_id): + return self._download_json( + 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, + display_id) + + all_info = download_json(media_id) + + # media_id extracted out of URL is not always a real id + if 'video' not in all_info and 'audio' not in all_info: + entries = [] + + for item in all_info.get('items', []): + item_url = item.get('url') + if not item_url: + continue + entries.append(self.url_result(item_url, 'RTS')) + + if not entries: + page, urlh = self._download_webpage_handle(url, display_id) + if re.match(self._VALID_URL, urlh.url).group('id') != media_id: + return self.url_result(urlh.url, 'RTS') + + # article with videos on rhs + videos = re.findall( + r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"', + page) + if not videos: + videos = re.findall( + r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', + page) + if videos: + entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos] + + if entries: + return self.playlist_result(entries, media_id, all_info.get('title')) + + internal_id = self._html_search_regex( + r'<(?:video|audio) data-id="([0-9]+)"', page, + 'internal video id') + all_info = download_json(internal_id) + + media_type = 'video' if 'video' in all_info else 'audio' + + # check for errors + self._get_media_data('rts', media_type, media_id) + + info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] + + title = info['title'] + + def extract_bitrate(url): + return int_or_none(self._search_regex( + r'-([0-9]+)k\.', url, 'bitrate', default=None)) + + formats = [] + streams = info.get('streams', {}) + for format_id, format_url in streams.items(): + if format_id == 'hds_sd' and 'hds' in streams: + continue + if format_id == 'hls_sd' and 'hls' in streams: + continue + ext = determine_ext(format_url) + if ext in ('m3u8', 'f4m'): + format_url = self._get_tokenized_src(format_url, media_id, format_id) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0', + media_id, f4m_id=format_id, fatal=False)) + else: + formats.extend(self._extract_m3u8_formats( + format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) + else: + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'tbr': extract_bitrate(format_url), + }) + + download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '') + for media in info.get('media', []): + media_url = media.get('url') + if not media_url or re.match(r'https?://', media_url): + continue + rate = media.get('rate') + ext = media.get('ext') or determine_ext(media_url, 'mp4') + format_id = ext + if rate: + format_id += '-%dk' % rate + formats.append({ + 'format_id': format_id, + 'url': urljoin(download_base, media_url), + 'tbr': rate or extract_bitrate(media_url), + }) + + self._check_formats(formats, media_id) + + duration = info.get('duration') or info.get('cutout') or info.get('cutduration') + if isinstance(duration, compat_str): + duration = parse_duration(duration) + + return { + 'id': media_id, + 'display_id': display_id, + 'formats': formats, + 'title': title, + 'description': info.get('intro'), + 'duration': duration, + 'view_count': int_or_none(info.get('plays')), + 'uploader': info.get('programName'), + 'timestamp': parse_iso8601(info.get('broadcast_date')), + 'thumbnail': unescapeHTML(info.get('preview_image_url')), + } diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py new file mode 100644 index 0000000..741c472 --- /dev/null +++ b/yt_dlp/extractor/rtvcplay.py @@ -0,0 +1,285 @@ +import re + +from .common import InfoExtractor, ExtractorError +from ..utils import ( + clean_html, + determine_ext, + int_or_none, + float_or_none, + js_to_json, + mimetype2ext, + traverse_obj, + urljoin, + url_or_none, +) + + +class RTVCPlayBaseIE(InfoExtractor): + _BASE_VALID_URL = r'https?://(?:www\.)?rtvcplay\.co' + + def _extract_player_config(self, webpage, video_id): + return self._search_json( + r'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re.sub(r'"\s*\+\s*"', '', webpage), + 'player_config', video_id, transform_source=js_to_json) + + def _extract_formats_and_subtitles_player_config(self, player_config, video_id): + formats, subtitles = [], {} + for source in traverse_obj(player_config, ('sources', ..., lambda _, v: url_or_none(v['url']))): + ext = mimetype2ext(source.get('mimetype'), default=determine_ext(source['url'])) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + source['url'], video_id, 'mp4', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.append({ + 'url': source['url'], + 'ext': ext, + }) + + return formats, subtitles + + +class RTVCPlayIE(RTVCPlayBaseIE): + _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)' + + _TESTS = [{ + 'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional', + 'info_dict': { + 'id': 'canal-institucional', + 'title': r're:^Canal Institucional', + 'description': 'md5:eff9e548394175928059320c006031ea', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'live_status': 'is_live', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia', + 'info_dict': { + 'id': 'senal-colombia', + 'title': r're:^Señal Colombia', + 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'live_status': 'is_live', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional', + 'info_dict': { + 'id': 'radio-nacional', + 'title': r're:^Radio Nacional', + 'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'live_status': 'is_live', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }, { + 'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas', + 'md5': '1288ee6f6d1330d880f98bff2ed710a3', + 'info_dict': { + 'id': 'senoritas', + 'title': 'Señoritas', + 'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'ext': 'mp4', + }, + }, { + 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022', + 'md5': 'f040a7380a269ad633cf837384d5e9fc', + 'info_dict': { + 'id': 'james-regresa-clases-28022022', + 'title': 'James regresa a clases - 28/02/2022', + 'description': 'md5:c5dcdf757c7ab29305e8763c6007e675', + 'ext': 'mp4', + }, + }, { + 'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo', + 'info_dict': { + 'id': 'llinas-el-cerebro-y-el-universo', + 'title': 'Llinás, el cerebro y el universo', + 'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'playlist_mincount': 3, + }, { + 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa', + 'info_dict': { + 'id': 'profe-en-tu-casa', + 'title': 'Profe en tu casa', + 'description': 'md5:47dbe20e263194413b1db2a2805a4f2e', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'playlist_mincount': 537, + }, { + 'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura', + 'info_dict': { + 'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura', + 'title': 'Relato de un náufrago: una travesía del periodismo a la literatura', + 'description': 'md5:6da28fdca4a5a568ea47ef65ef775603', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'playlist_mincount': 5, + }, { + 'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones', + 'info_dict': { + 'id': 'diez-versiones', + 'title': 'Diez versiones', + 'description': 'md5:997471ed971cb3fd8e41969457675306', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'playlist_mincount': 20, + }] + + def _real_extract(self, url): + video_id, category = self._match_valid_url(url).group('id', 'category') + webpage = self._download_webpage(url, video_id) + + hydration = self._search_json( + r'window\.__RTVCPLAY_STATE__\s*=', webpage, 'hydration', + video_id, transform_source=js_to_json)['content']['currentContent'] + + asset_id = traverse_obj(hydration, ('video', 'assetid')) + if asset_id: + hls_url = hydration['base_url_hls'].replace('[node:field_asset_id]', asset_id) + else: + hls_url = traverse_obj(hydration, ('channel', 'hls')) + + metadata = traverse_obj(hydration, { + 'title': 'title', + 'description': 'description', + 'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'), + }, get_all=False) + + # Probably it's a program's page + if not hls_url: + seasons = traverse_obj( + hydration, ('widgets', lambda _, y: y['type'] == 'seasonList', 'contents'), + get_all=False) + if not seasons: + podcast_episodes = hydration.get('audios') + if not podcast_episodes: + raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes') + + return self.playlist_result([ + self.url_result(episode['file'], url_transparent=True, **traverse_obj(episode, { + 'title': 'title', + 'description': ('description', {clean_html}), + 'episode_number': ('chapter_number', {float_or_none}, {int_or_none}), + 'season_number': ('season', {int_or_none}), + })) for episode in podcast_episodes], video_id, **metadata) + + entries = [self.url_result( + urljoin(url, episode['slug']), url_transparent=True, + **traverse_obj(season, { + 'season': 'title', + 'season_number': ('season', {int_or_none}), + }), **traverse_obj(episode, { + 'title': 'title', + 'thumbnail': ('image', 'cover', 'path'), + 'episode_number': ('chapter_number', {int_or_none}), + })) for season in seasons for episode in traverse_obj(season, ('contents', ...))] + + return self.playlist_result(entries, video_id, **metadata) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls_url, video_id, 'mp4') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': category == 'en-vivo', + **metadata, + } + + +class RTVCPlayEmbedIE(RTVCPlayBaseIE): + _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/embed/(?P<id>[\w-]+)' + + _TESTS = [{ + 'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9', + 'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8', + 'info_dict': { + 'id': '72b0e699-248b-4929-a4a8-3782702fa7f9', + 'title': 'Tráiler: Señoritas', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'ext': 'mp4', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + player_config = self._extract_player_config(webpage, video_id) + formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id) + + asset_id = traverse_obj(player_config, ('rtvcplay', 'assetid')) + metadata = {} if not asset_id else self._download_json( + f'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id, fatal=False) + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + **traverse_obj(metadata, { + 'title': 'title', + 'description': 'description', + 'thumbnail': ('image', ..., 'thumbnail', 'path'), + }, get_all=False) + } + + +class RTVCKalturaIE(RTVCPlayBaseIE): + _VALID_URL = r'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)' + + _TESTS = [{ + 'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html', + 'info_dict': { + 'id': 'indexSC', + 'title': r're:^Señal Colombia', + 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'live_status': 'is_live', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + player_config = self._extract_player_config(webpage, video_id) + formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id) + + channel_id = traverse_obj(player_config, ('rtvcplay', 'channelId')) + metadata = {} if not channel_id else self._download_json( + f'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id, fatal=False) + + fmts, subs = self._extract_m3u8_formats_and_subtitles( + traverse_obj(metadata, ('channel', 'hls')), video_id, 'mp4', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + **traverse_obj(metadata, { + 'title': 'title', + 'description': 'description', + 'thumbnail': ('channel', 'image', 'logo', 'path'), + }) + } diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py new file mode 100644 index 0000000..a99a266 --- /dev/null +++ b/yt_dlp/extractor/rtve.py @@ -0,0 +1,344 @@ +import base64 +import io +import struct + +from .common import InfoExtractor +from ..compat import compat_b64decode +from ..utils import ( + ExtractorError, + determine_ext, + float_or_none, + qualities, + remove_end, + remove_start, + try_get, +) + + +class RTVEALaCartaIE(InfoExtractor): + IE_NAME = 'rtve.es:alacarta' + IE_DESC = 'RTVE a la carta' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', + 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', + 'info_dict': { + 'id': '2491869', + 'ext': 'mp4', + 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', + 'duration': 5024.566, + 'series': 'Balonmano', + }, + 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], + }, { + 'note': 'Live stream', + 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', + 'info_dict': { + 'id': '1694255', + 'ext': 'mp4', + 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + 'skip_download': 'live stream', + }, + }, { + 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', + 'md5': 'd850f3c8731ea53952ebab489cf81cbf', + 'info_dict': { + 'id': '4236788', + 'ext': 'mp4', + 'title': 'Servir y proteger - Capítulo 104', + 'duration': 3222.0, + }, + 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], + }, { + 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', + 'only_matching': True, + }, { + 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', + 'only_matching': True, + }] + + def _real_initialize(self): + user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8') + self._manager = self._download_json( + 'http://www.rtve.es/odin/loki/' + user_agent_b64, + None, 'Fetching manager info')['manager'] + + @staticmethod + def _decrypt_url(png): + encrypted_data = io.BytesIO(compat_b64decode(png)[8:]) + while True: + length = struct.unpack('!I', encrypted_data.read(4))[0] + chunk_type = encrypted_data.read(4) + if chunk_type == b'IEND': + break + data = encrypted_data.read(length) + if chunk_type == b'tEXt': + alphabet_data, text = data.split(b'\0') + quality, url_data = text.split(b'%%') + alphabet = [] + e = 0 + d = 0 + for l in alphabet_data.decode('iso-8859-1'): + if d == 0: + alphabet.append(l) + d = e = (e + 1) % 4 + else: + d -= 1 + url = '' + f = 0 + e = 3 + b = 1 + for letter in url_data.decode('iso-8859-1'): + if f == 0: + l = int(letter) * 10 + f = 1 + else: + if e == 0: + l += int(letter) + url += alphabet[l] + e = (b + 3) % 4 + f = 0 + b += 1 + else: + e -= 1 + + yield quality.decode(), url + encrypted_data.read(4) # CRC + + def _extract_png_formats(self, video_id): + png = self._download_webpage( + 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id), + video_id, 'Downloading url information', query={'q': 'v2'}) + q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) + formats = [] + for quality, video_url in self._decrypt_url(png): + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, 'dash', fatal=False)) + else: + formats.append({ + 'format_id': quality, + 'quality': q(quality), + 'url': video_url, + }) + return formats + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_json( + 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, + video_id)['page']['items'][0] + if info['state'] == 'DESPU': + raise ExtractorError('The video is no longer available', expected=True) + title = info['title'].strip() + formats = self._extract_png_formats(video_id) + + subtitles = None + sbt_file = info.get('sbtFile') + if sbt_file: + subtitles = self.extract_subtitles(video_id, sbt_file) + + is_live = info.get('live') is True + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': info.get('image'), + 'subtitles': subtitles, + 'duration': float_or_none(info.get('duration'), 1000), + 'is_live': is_live, + 'series': info.get('programTitle'), + } + + def _get_subtitles(self, video_id, sub_file): + subs = self._download_json( + sub_file + '.json', video_id, + 'Downloading subtitles info')['page']['items'] + return dict( + (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) + for s in subs) + + +class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE + IE_NAME = 'rtve.es:audio' + IE_DESC = 'RTVE audio' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)' + + _TESTS = [{ + 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', + 'md5': 'ae06d27bff945c4e87a50f89f6ce48ce', + 'info_dict': { + 'id': '5889192', + 'ext': 'mp3', + 'title': 'Códigos informáticos', + 'thumbnail': r're:https?://.+/1598856591583.jpg', + 'duration': 349.440, + 'series': 'A hombros de gigantes', + }, + }, { + 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/', + 'md5': '072855ab89a9450e0ba314c717fa5ebc', + 'info_dict': { + 'id': '5791165', + 'ext': 'mp3', + 'title': 'Ignatius Farray', + 'thumbnail': r're:https?://.+/1613243011863.jpg', + 'duration': 3559.559, + 'series': 'En Radio 3' + }, + }, { + 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', + 'md5': '0eadab248cc8dd193fa5765712e84d5c', + 'info_dict': { + 'id': '6082623', + 'ext': 'mp3', + 'title': 'Capítulo 26 y último: La muerte de Victor', + 'thumbnail': r're:https?://.+/1632147445707.jpg', + 'duration': 3174.086, + 'series': 'Frankenstein o el moderno Prometeo' + }, + }] + + def _extract_png_formats(self, audio_id): + """ + This function retrieves media related png thumbnail which obfuscate + valuable information about the media. This information is decrypted + via base class _decrypt_url function providing media quality and + media url + """ + png = self._download_webpage( + 'http://www.rtve.es/ztnr/movil/thumbnail/%s/audios/%s.png' % + (self._manager, audio_id), + audio_id, 'Downloading url information', query={'q': 'v2'}) + q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) + formats = [] + for quality, audio_url in self._decrypt_url(png): + ext = determine_ext(audio_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + audio_url, audio_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + audio_url, audio_id, 'dash', fatal=False)) + else: + formats.append({ + 'format_id': quality, + 'quality': q(quality), + 'url': audio_url, + }) + return formats + + def _real_extract(self, url): + audio_id = self._match_id(url) + info = self._download_json( + 'https://www.rtve.es/api/audios/%s.json' % audio_id, + audio_id)['page']['items'][0] + + return { + 'id': audio_id, + 'title': info['title'].strip(), + 'thumbnail': info.get('thumbnail'), + 'duration': float_or_none(info.get('duration'), 1000), + 'series': try_get(info, lambda x: x['programInfo']['title']), + 'formats': self._extract_png_formats(audio_id), + } + + +class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE + IE_NAME = 'rtve.es:infantil' + IE_DESC = 'RTVE infantil' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/' + + _TESTS = [{ + 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', + 'md5': '5747454717aedf9f9fdf212d1bcfc48d', + 'info_dict': { + 'id': '3040283', + 'ext': 'mp4', + 'title': 'Maneras de vivir', + 'thumbnail': r're:https?://.+/1426182947956\.JPG', + 'duration': 357.958, + }, + 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], + }] + + +class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE + IE_NAME = 'rtve.es:live' + IE_DESC = 'RTVE.es live streams' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' + + _TESTS = [{ + 'url': 'http://www.rtve.es/directo/la-1/', + 'info_dict': { + 'id': 'la-1', + 'ext': 'mp4', + 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + 'skip_download': 'live stream', + } + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es') + title = remove_start(title, 'Estoy viendo ') + + vidplayer_id = self._search_regex( + (r'playerId=player([0-9]+)', + r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', + r'data-id=["\'](\d+)'), + webpage, 'internal video ID') + + return { + 'id': video_id, + 'title': title, + 'formats': self._extract_png_formats(vidplayer_id), + 'is_live': True, + } + + +class RTVETelevisionIE(InfoExtractor): + IE_NAME = 'rtve.es:television' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' + + _TEST = { + 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', + 'info_dict': { + 'id': '3069778', + 'ext': 'mp4', + 'title': 'Documentos TV - La revolución del móvil', + 'duration': 3496.948, + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + alacarta_url = self._search_regex( + r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', + webpage, 'alacarta url', default=None) + if alacarta_url is None: + raise ExtractorError( + 'The webpage doesn\'t contain any video', expected=True) + + return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py new file mode 100644 index 0000000..a84a78d --- /dev/null +++ b/yt_dlp/extractor/rtvs.py @@ -0,0 +1,85 @@ +import re + +from .common import InfoExtractor + +from ..utils import ( + parse_duration, + traverse_obj, + unified_timestamp, +) + + +class RTVSIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv(?:/\d+)?/(?P<id>\d+)/?(?:[#?]|$)' + _TESTS = [{ + # radio archive + 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', + 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', + 'info_dict': { + 'id': '414872', + 'ext': 'mp3', + 'title': 'Ostrov pokladov 1 časť.mp3', + 'duration': 2854, + 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg', + 'display_id': '135331', + } + }, { + # tv archive + 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', + 'info_dict': { + 'id': '63118', + 'ext': 'mp4', + 'title': 'Amaro Džives - Náš deň', + 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.', + 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0031/L7Qm.amaro_dzives_png.jpg', + 'timestamp': 1428555900, + 'upload_date': '20150409', + 'duration': 4986, + } + }, { + # tv archive + 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin', + 'info_dict': { + 'id': '18083', + 'ext': 'mp4', + 'title': 'Robin', + 'description': 'md5:2f70505a7b8364491003d65ff7a0940a', + 'timestamp': 1636652760, + 'display_id': '307655', + 'duration': 831, + 'upload_date': '20211111', + 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + iframe_id = self._search_regex( + r'<iframe[^>]+id\s*=\s*"player_[^_]+_([0-9]+)"', webpage, 'Iframe ID') + iframe_url = self._search_regex( + fr'<iframe[^>]+id\s*=\s*"player_[^_]+_{re.escape(iframe_id)}"[^>]+src\s*=\s*"([^"]+)"', webpage, 'Iframe URL') + + webpage = self._download_webpage(iframe_url, video_id, 'Downloading iframe') + json_url = self._search_regex(r'var\s+url\s*=\s*"([^"]+)"\s*\+\s*ruurl', webpage, 'json URL') + data = self._download_json(f'https:{json_url}b=mozilla&p=win&v=97&f=0&d=1', video_id) + + if data.get('clip'): + data['playlist'] = [data['clip']] + + if traverse_obj(data, ('playlist', 0, 'sources', 0, 'type')) == 'audio/mp3': + formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}] + else: + formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id) + + return { + 'id': video_id, + 'display_id': iframe_id, + 'title': traverse_obj(data, ('playlist', 0, 'title')), + 'description': traverse_obj(data, ('playlist', 0, 'description')), + 'duration': parse_duration(traverse_obj(data, ('playlist', 0, 'length'))), + 'thumbnail': traverse_obj(data, ('playlist', 0, 'image')), + 'timestamp': unified_timestamp(traverse_obj(data, ('playlist', 0, 'datetime_create'))), + 'formats': formats + } diff --git a/yt_dlp/extractor/rtvslo.py b/yt_dlp/extractor/rtvslo.py new file mode 100644 index 0000000..39ace7c --- /dev/null +++ b/yt_dlp/extractor/rtvslo.py @@ -0,0 +1,166 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + parse_duration, + traverse_obj, + unified_timestamp, + url_or_none, +) + + +class RTVSLOIE(InfoExtractor): + IE_NAME = 'rtvslo.si' + _VALID_URL = r'''(?x) + https?://(?: + (?:365|4d)\.rtvslo.si/arhiv/[^/?#&;]+| + (?:www\.)?rtvslo\.si/rtv365/arhiv + )/(?P<id>\d+)''' + _GEO_COUNTRIES = ['SI'] + + _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622' + SUB_LANGS_MAP = {'Slovenski': 'sl'} + + _TESTS = [ + { + 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv', + 'info_dict': { + 'id': '174842550', + 'ext': 'mp4', + 'release_timestamp': 1643140032, + 'upload_date': '20220125', + 'series': 'Dnevnik', + 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg', + 'description': 'md5:76a18692757aeb8f0f51221106277dd2', + 'timestamp': 1643137046, + 'title': 'Dnevnik', + 'series_id': '92', + 'release_date': '20220125', + 'duration': 1789, + }, + }, { + 'url': 'https://365.rtvslo.si/arhiv/utrip/174843754', + 'info_dict': { + 'id': '174843754', + 'ext': 'mp4', + 'series_id': '94', + 'release_date': '20220129', + 'timestamp': 1643484455, + 'title': 'Utrip', + 'duration': 813, + 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg', + 'description': 'md5:77f2892630c7b17bb7a5bb84319020c9', + 'release_timestamp': 1643485825, + 'upload_date': '20220129', + 'series': 'Utrip', + }, + }, { + 'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609', + 'info_dict': { + 'id': '174844609', + 'ext': 'mp3', + 'series_id': '106615841', + 'title': 'Il giornale della sera', + 'duration': 1328, + 'series': 'Il giornale della sera', + 'timestamp': 1643743800, + 'release_timestamp': 1643745424, + 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg', + 'upload_date': '20220201', + 'tbr': 128000, + 'release_date': '20220201', + }, + }, { + 'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750', + 'info_dict': { + 'id': '148350750', + 'ext': 'mp4', + 'title': 'Prvi šolski dan, mozaična oddaja za mlade', + 'series': 'Razred zase', + 'series_id': '148185730', + 'duration': 1481, + 'upload_date': '20121019', + 'timestamp': 1350672122, + 'release_date': '20121019', + 'release_timestamp': 1350672122, + 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg', + }, + }, { + 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550', + 'only_matching': True + } + ] + + def _real_extract(self, url): + v_id = self._match_id(url) + meta = self._download_json(self._API_BASE.format('getRecordingDrm', v_id), v_id)['response'] + + thumbs = [{'id': k, 'url': v, 'http_headers': {'Accept': 'image/jpeg'}} + for k, v in (meta.get('images') or {}).items()] + + subs = {} + for s in traverse_obj(meta, 'subs', 'subtitles', default=[]): + lang = self.SUB_LANGS_MAP.get(s.get('language'), s.get('language') or 'und') + subs.setdefault(lang, []).append({ + 'url': s.get('file'), + 'ext': traverse_obj(s, 'format', expected_type=str.lower), + }) + + jwt = meta.get('jwt') + if not jwt: + raise ExtractorError('Site did not provide an authentication token, cannot proceed.') + + media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response'] + + formats = [] + skip_protocols = ['smil', 'f4m', 'dash'] + adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none) + if adaptive_url: + formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols) + + adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none) + if adaptive_url: + for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols): + formats.append({ + **f, + 'format_id': 'sign-' + f['format_id'], + 'format_note': 'Sign language interpretation', 'preference': -10, + 'language': ( + 'slv' if f.get('language') == 'eng' and f.get('acodec') != 'none' + else f.get('language')) + }) + + for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))): + formats.append(traverse_obj(mediafile, { + 'url': ('streams', 'https'), + 'ext': ('mediaType', {str.lower}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + 'tbr': ('bitrate', {int_or_none}), + 'filesize': ('filesize', {int_or_none}), + })) + + for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['hls_sec']))): + formats.extend(self._extract_wowza_formats( + mediafile['streams']['hls_sec'], v_id, skip_protocols=skip_protocols)) + + if any('intermission.mp4' in x['url'] for x in formats): + self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) + if any('dummy_720p.mp4' in x.get('manifest_url', '') for x in formats) and meta.get('stub') == 'error': + raise ExtractorError(f'{self.IE_NAME} said: Clip not available', expected=True) + + return { + 'id': v_id, + 'webpage_url': ''.join(traverse_obj(meta, ('canonical', ('domain', 'path')))), + 'title': meta.get('title'), + 'formats': formats, + 'subtitles': subs, + 'thumbnails': thumbs, + 'description': meta.get('description'), + 'timestamp': unified_timestamp(traverse_obj(meta, 'broadcastDate', ('broadcastDates', 0))), + 'release_timestamp': unified_timestamp(meta.get('recordingDate')), + 'duration': meta.get('duration') or parse_duration(meta.get('length')), + 'tags': meta.get('genre'), + 'series': meta.get('showName'), + 'series_id': meta.get('showId'), + } diff --git a/yt_dlp/extractor/rudovideo.py b/yt_dlp/extractor/rudovideo.py new file mode 100644 index 0000000..1b85955 --- /dev/null +++ b/yt_dlp/extractor/rudovideo.py @@ -0,0 +1,135 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + determine_ext, + js_to_json, + traverse_obj, + update_url_query, + url_or_none, +) + + +class RudoVideoIE(InfoExtractor): + _VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)' + _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)'] + _TESTS = [{ + 'url': 'https://rudo.video/podcast/cz2wrUy8l0o', + 'md5': '28ed82b477708dc5e12e072da2449221', + 'info_dict': { + 'id': 'cz2wrUy8l0o', + 'title': 'Diego Cabot', + 'ext': 'mp4', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/podcast/bQkt07', + 'md5': '36b22a9863de0f47f00fc7532a32a898', + 'info_dict': { + 'id': 'bQkt07', + 'title': 'Tubular Bells', + 'ext': 'mp4', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/podcast/b42ZUznHX0', + 'md5': 'b91c70d832938871367f8ad10c895821', + 'info_dict': { + 'id': 'b42ZUznHX0', + 'title': 'Columna Ruperto Concha', + 'ext': 'mp3', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/vod/bN5AaJ', + 'md5': '01324a329227e2591530ecb4f555c881', + 'info_dict': { + 'id': 'bN5AaJ', + 'title': 'Ucrania 19.03', + 'creator': 'La Tercera', + 'ext': 'mp4', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/live/bbtv', + 'info_dict': { + 'id': 'bbtv', + 'ext': 'mp4', + 'creator': 'BioBioTV', + 'live_status': 'is_live', + 'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/live/c13', + 'info_dict': { + 'id': 'c13', + 'title': 'CANAL13', + 'ext': 'mp4', + }, + 'skip': 'Geo-restricted to Chile', + }, { + 'url': 'https://rudo.video/live/t13-13cl', + 'info_dict': { + 'id': 't13-13cl', + 'title': 'T13', + 'ext': 'mp4', + }, + 'skip': 'Geo-restricted to Chile', + }] + + def _real_extract(self, url): + video_id, type_ = self._match_valid_url(url).group('id', 'type') + is_live = type_ == 'live' + + webpage = self._download_webpage(url, video_id) + if 'Streaming is not available in your area' in webpage: + self.raise_geo_restricted() + + media_url = ( + self._search_regex( + r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None) + # Source URL must be used only if streamURL is unavailable + or self._search_regex( + r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None)) + if not media_url: + youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)', + webpage, 'youtube url', default=None) + if youtube_url: + return self.url_result(youtube_url, 'Youtube') + raise ExtractorError('Unable to extract stream url') + + token_array = self._search_json( + r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id, + contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json) + if token_array: + token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False) + if not token_url: + raise ExtractorError('Invalid access token array') + access_token = self._download_json( + token_url, video_id, note='Downloading access token')['data']['authToken'] + media_url = update_url_query(media_url, {'auth-token': access_token}) + + ext = determine_ext(media_url) + if ext == 'm3u8': + formats = self._extract_m3u8_formats(media_url, video_id, live=is_live) + elif ext == 'mp3': + formats = [{ + 'url': media_url, + 'vcodec': 'none', + }] + else: + formats = [{'url': media_url}] + + return { + 'id': video_id, + 'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)', + webpage, 'title', default=None) + or self._og_search_title(webpage)), + 'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)', + webpage, 'videoAuthor', default=None), + 'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)', + webpage, 'thumbnail', default=None) + or self._og_search_thumbnail(webpage)), + 'formats': formats, + 'is_live': is_live, + } diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py new file mode 100644 index 0000000..11095b2 --- /dev/null +++ b/yt_dlp/extractor/rule34video.py @@ -0,0 +1,123 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + get_element_by_attribute, + get_element_by_class, + get_element_html_by_class, + get_elements_by_class, + int_or_none, + parse_count, + parse_duration, + unescapeHTML, +) +from ..utils.traversal import traverse_obj + + +class Rule34VideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos?/(?P<id>\d+)' + _TESTS = [ + { + 'url': 'https://rule34video.com/video/3065157/shot-it-mmd-hmv/', + 'md5': 'ffccac2c23799dabbd192621ae4d04f3', + 'info_dict': { + 'id': '3065157', + 'ext': 'mp4', + 'title': 'Shot It-(mmd hmv)', + 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg', + 'duration': 347.0, + 'age_limit': 18, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'timestamp': 1639872000, + 'description': 'https://discord.gg/aBqPrHSHvv', + 'upload_date': '20211219', + 'uploader': 'Sweet HMV', + 'uploader_url': 'https://rule34video.com/members/22119/', + 'categories': ['3D', 'MMD', 'iwara'], + 'tags': 'mincount:10' + } + }, + { + 'url': 'https://rule34video.com/videos/3065296/lara-in-trouble-ep-7-wildeerstudio/', + 'md5': '6bb5169f9f6b38cd70882bf2e64f6b86', + 'info_dict': { + 'id': '3065296', + 'ext': 'mp4', + 'title': 'Lara in Trouble Ep. 7 [WildeerStudio]', + 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg', + 'duration': 938.0, + 'age_limit': 18, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'timestamp': 1640131200, + 'description': '', + 'creators': ['WildeerStudio'], + 'upload_date': '20211222', + 'uploader': 'CerZule', + 'uploader_url': 'https://rule34video.com/members/36281/', + 'categories': ['3D', 'Tomb Raider'], + 'tags': 'mincount:40' + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + formats = [] + + for mobj in re.finditer(r'<a[^>]+href="(?P<video_url>[^"]+download=true[^"]+)".*>(?P<ext>[^\s]+) (?P<quality>[^<]+)p</a>', webpage): + url, ext, quality = mobj.groups() + formats.append({ + 'url': url, + 'ext': ext.lower(), + 'quality': quality, + }) + + categories, creators, uploader, uploader_url = [None] * 4 + for col in get_elements_by_class('col', webpage): + label = clean_html(get_element_by_class('label', col)) + if label == 'Categories:': + categories = list(map(clean_html, get_elements_by_class('item', col))) + elif label == 'Artist:': + creators = list(map(clean_html, get_elements_by_class('item', col))) + elif label == 'Uploaded By:': + uploader = clean_html(get_element_by_class('name', col)) + uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href') + + return { + **traverse_obj(self._search_json_ld(webpage, video_id, default={}), ({ + 'title': 'title', + 'view_count': 'view_count', + 'like_count': 'like_count', + 'duration': 'duration', + 'timestamp': 'timestamp', + 'description': 'description', + 'thumbnail': ('thumbnails', 0, 'url'), + })), + 'id': video_id, + 'formats': formats, + 'title': self._html_extract_title(webpage), + 'thumbnail': self._html_search_regex( + r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None), + 'duration': parse_duration(self._html_search_regex( + r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)), + 'view_count': int_or_none(self._html_search_regex( + r'"icon-eye"></i>\s+<span>([ \d]+)', webpage, 'views', default='').replace(' ', '')), + 'like_count': parse_count(get_element_by_class('voters count', webpage)), + 'comment_count': int_or_none(self._search_regex( + r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)), + 'age_limit': 18, + 'creators': creators, + 'uploader': uploader, + 'uploader_url': uploader_url, + 'categories': categories, + 'tags': list(map(unescapeHTML, re.findall( + r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))), + } diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py new file mode 100644 index 0000000..837a324 --- /dev/null +++ b/yt_dlp/extractor/rumble.py @@ -0,0 +1,390 @@ +import itertools +import re + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + UnsupportedError, + clean_html, + determine_ext, + format_field, + get_element_by_class, + int_or_none, + join_nonempty, + parse_count, + parse_iso8601, + traverse_obj, + unescapeHTML, +) + + +class RumbleEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)' + _EMBED_REGEX = [fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{_VALID_URL})'] + _TESTS = [{ + 'url': 'https://rumble.com/embed/v5pv5f', + 'md5': '36a18a049856720189f30977ccbb2c34', + 'info_dict': { + 'id': 'v5pv5f', + 'ext': 'mp4', + 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm', + 'timestamp': 1571611968, + 'upload_date': '20191020', + 'channel_url': 'https://rumble.com/c/WMAR', + 'channel': 'WMAR', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg', + 'duration': 234, + 'uploader': 'WMAR', + 'live_status': 'not_live', + } + }, { + 'url': 'https://rumble.com/embed/vslb7v', + 'md5': '7418035de1a30a178b8af34dc2b6a52b', + 'info_dict': { + 'id': 'vslb7v', + 'ext': 'mp4', + 'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'', + 'timestamp': 1645142135, + 'upload_date': '20220217', + 'channel_url': 'https://rumble.com/c/CyberTechNews', + 'channel': 'CTNews', + 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', + 'duration': 901, + 'uploader': 'CTNews', + 'live_status': 'not_live', + } + }, { + 'url': 'https://rumble.com/embed/vunh1h', + 'info_dict': { + 'id': 'vunh1h', + 'ext': 'mp4', + 'title': '‘Gideon, op zoek naar de waarheid’ including ENG SUBS', + 'timestamp': 1647197663, + 'upload_date': '20220313', + 'channel_url': 'https://rumble.com/user/BLCKBX', + 'channel': 'BLCKBX', + 'thumbnail': r're:https://.+\.jpg', + 'duration': 5069, + 'uploader': 'BLCKBX', + 'live_status': 'not_live', + 'subtitles': { + 'en': [ + { + 'url': r're:https://.+\.vtt', + 'name': 'English', + 'ext': 'vtt' + } + ] + }, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://rumble.com/embed/v1essrt', + 'info_dict': { + 'id': 'v1essrt', + 'ext': 'mp4', + 'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to', + 'timestamp': 1661519399, + 'upload_date': '20220826', + 'channel_url': 'https://rumble.com/c/LofiGirl', + 'channel': 'Lofi Girl', + 'thumbnail': r're:https://.+\.jpg', + 'uploader': 'Lofi Girl', + 'live_status': 'is_live', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://rumble.com/embed/v1amumr', + 'info_dict': { + 'id': 'v1amumr', + 'ext': 'mp4', + 'fps': 60, + 'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live', + 'timestamp': 1658518457, + 'upload_date': '20220722', + 'channel_url': 'https://rumble.com/c/RumbleEvents', + 'channel': 'Rumble Events', + 'thumbnail': r're:https://.+\.jpg', + 'duration': 16427, + 'uploader': 'Rumble Events', + 'live_status': 'was_live', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', + 'only_matching': True, + }] + + _WEBPAGE_TESTS = [ + { + 'note': 'Rumble JS embed', + 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it', + 'md5': '4701209ac99095592e73dbba21889690', + 'info_dict': { + 'id': 'v15eqxl', + 'ext': 'mp4', + 'channel': 'Mr Producer Media', + 'duration': 92, + 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh', + 'channel_url': 'https://rumble.com/c/RichSementa', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg', + 'timestamp': 1654892716, + 'uploader': 'Mr Producer Media', + 'upload_date': '20220610', + 'live_status': 'not_live', + } + }, + ] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + embeds = tuple(super()._extract_embed_urls(url, webpage)) + if embeds: + return embeds + return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer( + r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{[^}]*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + video = self._download_json( + 'https://rumble.com/embedJS/u3/', video_id, + query={'request': 'video', 'ver': 2, 'v': video_id}) + + sys_msg = traverse_obj(video, ('sys', 'msg')) + if sys_msg: + self.report_warning(sys_msg, video_id=video_id) + + if video.get('live') == 0: + live_status = 'not_live' if video.get('livestream_has_dvr') is None else 'was_live' + elif video.get('live') == 1: + live_status = 'is_upcoming' if video.get('livestream_has_dvr') else 'was_live' + elif video.get('live') == 2: + live_status = 'is_live' + else: + live_status = None + + formats = [] + for ext, ext_info in (video.get('ua') or {}).items(): + if isinstance(ext_info, dict): + for height, video_info in ext_info.items(): + if not traverse_obj(video_info, ('meta', 'h', {int_or_none})): + video_info.setdefault('meta', {})['h'] = height + ext_info = ext_info.values() + + for video_info in ext_info: + meta = video_info.get('meta') or {} + if not video_info.get('url'): + continue + if ext == 'hls': + if meta.get('live') is True and video.get('live') == 1: + live_status = 'post_live' + formats.extend(self._extract_m3u8_formats( + video_info['url'], video_id, + ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live')) + continue + timeline = ext == 'timeline' + if timeline: + ext = determine_ext(video_info['url']) + formats.append({ + 'ext': ext, + 'acodec': 'none' if timeline else None, + 'url': video_info['url'], + 'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')), + 'format_note': 'Timeline' if timeline else None, + 'fps': None if timeline else video.get('fps'), + **traverse_obj(meta, { + 'tbr': 'bitrate', + 'filesize': 'size', + 'width': 'w', + 'height': 'h', + }, expected_type=lambda x: int(x) or None) + }) + + subtitles = { + lang: [{ + 'url': sub_info['path'], + 'name': sub_info.get('language') or '', + }] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path') + } + + author = video.get('author') or {} + thumbnails = traverse_obj(video, ('t', ..., {'url': 'i', 'width': 'w', 'height': 'h'})) + if not thumbnails and video.get('i'): + thumbnails = [{'url': video['i']}] + + if live_status in {'is_live', 'post_live'}: + duration = None + else: + duration = int_or_none(video.get('duration')) + + return { + 'id': video_id, + 'title': unescapeHTML(video.get('title')), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + 'timestamp': parse_iso8601(video.get('pubDate')), + 'channel': author.get('name'), + 'channel_url': author.get('url'), + 'duration': duration, + 'uploader': author.get('name'), + 'live_status': live_status, + } + + +class RumbleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$' + _EMBED_REGEX = [ + r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>', + r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>'] + _TESTS = [{ + 'add_ie': ['RumbleEmbed'], + 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', + 'md5': '53af34098a7f92c4e51cf0bd1c33f009', + 'info_dict': { + 'id': 'vb0ofn', + 'ext': 'mp4', + 'timestamp': 1612662578, + 'uploader': 'LovingMontana', + 'channel': 'LovingMontana', + 'upload_date': '20210207', + 'title': 'Winter-loving dog helps girls dig a snow fort ', + 'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!', + 'channel_url': 'https://rumble.com/c/c-546523', + 'thumbnail': r're:https://.+\.jpg', + 'duration': 103, + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + 'live_status': 'not_live', + } + }, { + 'url': 'http://www.rumble.com/vDMUM1?key=value', + 'only_matching': True, + }, { + 'note': 'timeline format', + 'url': 'https://rumble.com/v2ea9qb-the-u.s.-cannot-hide-this-in-ukraine-anymore-redacted-with-natali-and-clayt.html', + 'md5': '40d61fec6c0945bca3d0e1dc1aa53d79', + 'params': {'format': 'wv'}, + 'info_dict': { + 'id': 'v2bou5f', + 'ext': 'mp4', + 'uploader': 'Redacted News', + 'upload_date': '20230322', + 'timestamp': 1679445010, + 'title': 'The U.S. CANNOT hide this in Ukraine anymore | Redacted with Natali and Clayton Morris', + 'duration': 892, + 'channel': 'Redacted News', + 'description': 'md5:aaad0c5c3426d7a361c29bdaaced7c42', + 'channel_url': 'https://rumble.com/c/Redacted', + 'live_status': 'not_live', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg', + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + }, + }, { + 'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html', + 'info_dict': { + 'id': 'v2blzyy', + 'ext': 'mp4', + 'live_status': 'was_live', + 'release_timestamp': 1679446804, + 'description': 'md5:2ac4908ccfecfb921f8ffa4b30c1e636', + 'release_date': '20230322', + 'timestamp': 1679445692, + 'duration': 4435, + 'upload_date': '20230322', + 'title': 'The Covid Twitter Files Drop: Protecting Fauci While Censoring The Truth w/Matt Taibbi', + 'uploader': 'Kim Iversen', + 'channel_url': 'https://rumble.com/c/KimIversen', + 'channel': 'Kim Iversen', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg', + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + }, + }] + + _WEBPAGE_TESTS = [{ + 'url': 'https://rumble.com/videos?page=2', + 'playlist_mincount': 24, + 'info_dict': { + 'id': 'videos?page=2', + 'title': 'All videos', + 'description': 'Browse videos uploaded to Rumble.com', + 'age_limit': 0, + }, + }, { + 'url': 'https://rumble.com/browse/live', + 'playlist_mincount': 25, + 'info_dict': { + 'id': 'live', + 'title': 'Browse', + 'age_limit': 0, + }, + }, { + 'url': 'https://rumble.com/search/video?q=rumble&sort=views', + 'playlist_mincount': 24, + 'info_dict': { + 'id': 'video?q=rumble&sort=views', + 'title': 'Search results for: rumble', + 'age_limit': 0, + }, + }] + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None) + if not url_info: + raise UnsupportedError(url) + + return { + '_type': 'url_transparent', + 'ie_key': url_info['ie_key'], + 'url': url_info['url'], + 'release_timestamp': parse_iso8601(self._search_regex( + r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)), + 'view_count': int_or_none(self._search_regex( + r'"userInteractionCount"\s*:\s*(\d+)', webpage, 'view count', default=None)), + 'like_count': parse_count(self._search_regex( + r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)), + 'dislike_count': parse_count(self._search_regex( + r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)), + 'description': clean_html(get_element_by_class('media-description', webpage)) + } + + +class RumbleChannelIE(InfoExtractor): + _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))' + + _TESTS = [{ + 'url': 'https://rumble.com/c/Styxhexenhammer666', + 'playlist_mincount': 1160, + 'info_dict': { + 'id': 'Styxhexenhammer666', + }, + }, { + 'url': 'https://rumble.com/user/goldenpoodleharleyeuna', + 'playlist_mincount': 4, + 'info_dict': { + 'id': 'goldenpoodleharleyeuna', + }, + }] + + def entries(self, url, playlist_id): + for page in itertools.count(1): + try: + webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 404: + break + raise + for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage): + yield self.url_result('https://rumble.com' + video_url) + + def _real_extract(self, url): + url, playlist_id = self._match_valid_url(url).groups() + return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id) diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py new file mode 100644 index 0000000..287824d --- /dev/null +++ b/yt_dlp/extractor/rutube.py @@ -0,0 +1,365 @@ +import itertools + +from .common import InfoExtractor +from ..compat import ( + compat_str, +) +from ..utils import ( + determine_ext, + bool_or_none, + int_or_none, + parse_qs, + try_get, + unified_timestamp, + url_or_none, +) + + +class RutubeBaseIE(InfoExtractor): + def _download_api_info(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/video/%s/' % video_id, + video_id, 'Downloading video JSON', + 'Unable to download video JSON', query=query) + + def _extract_info(self, video, video_id=None, require_title=True): + title = video['title'] if require_title else video.get('title') + + age_limit = video.get('is_adult') + if age_limit is not None: + age_limit = 18 if age_limit is True else 0 + + uploader_id = try_get(video, lambda x: x['author']['id']) + category = try_get(video, lambda x: x['category']['name']) + description = video.get('description') + duration = int_or_none(video.get('duration')) + + return { + 'id': video.get('id') or video_id if video_id else video['id'], + 'title': title, + 'description': description, + 'thumbnail': video.get('thumbnail_url'), + 'duration': duration, + 'uploader': try_get(video, lambda x: x['author']['name']), + 'uploader_id': compat_str(uploader_id) if uploader_id else None, + 'timestamp': unified_timestamp(video.get('created_ts')), + 'categories': [category] if category else None, + 'age_limit': age_limit, + 'view_count': int_or_none(video.get('hits')), + 'comment_count': int_or_none(video.get('comments_count')), + 'is_live': bool_or_none(video.get('is_livestream')), + 'chapters': self._extract_chapters_from_description(description, duration), + } + + def _download_and_extract_info(self, video_id, query=None): + return self._extract_info( + self._download_api_info(video_id, query=query), video_id) + + def _download_api_options(self, video_id, query=None): + if not query: + query = {} + query['format'] = 'json' + return self._download_json( + 'http://rutube.ru/api/play/options/%s/' % video_id, + video_id, 'Downloading options JSON', + 'Unable to download options JSON', + headers=self.geo_verification_headers(), query=query) + + def _extract_formats(self, options, video_id): + formats = [] + for format_id, format_url in options['video_balancer'].items(): + ext = determine_ext(format_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + return formats + + def _download_and_extract_formats(self, video_id, query=None): + return self._extract_formats( + self._download_api_options(video_id, query=query), video_id) + + +class RutubeIE(RutubeBaseIE): + IE_NAME = 'rutube' + IE_DESC = 'Rutube videos' + _VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})' + _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1'] + + _TESTS = [{ + 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', + 'md5': 'e33ac625efca66aba86cbec9851f2692', + 'info_dict': { + 'id': '3eac3b4561676c17df9132a9a1e62e3e', + 'ext': 'mp4', + 'title': 'Раненный кенгуру забежал в аптеку', + 'description': 'http://www.ntdtv.ru ', + 'duration': 81, + 'uploader': 'NTDRussian', + 'uploader_id': '29790', + 'timestamp': 1381943602, + 'upload_date': '20131016', + 'age_limit': 0, + 'view_count': int, + 'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', + 'categories': ['Новости и СМИ'], + 'chapters': [], + }, + 'expected_warnings': ['Unable to download f4m'], + }, { + 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', + 'only_matching': True, + }, { + 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', + 'only_matching': True, + }, { + 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', + 'only_matching': True, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', + 'only_matching': True, + }, { + 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg', + 'md5': 'd106225f15d625538fe22971158e896f', + 'info_dict': { + 'id': '884fb55f07a97ab673c7d654553e0f48', + 'ext': 'mp4', + 'title': 'Яцуноками, Nioh2', + 'description': 'Nioh2: финал сражения с боссом Яцуноками', + 'duration': 15, + 'uploader': 'mexus', + 'uploader_id': '24222106', + 'timestamp': 1670646232, + 'upload_date': '20221210', + 'age_limit': 0, + 'view_count': int, + 'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg', + 'categories': ['Видеоигры'], + 'chapters': [], + }, + 'expected_warnings': ['Unable to download f4m'], + }, { + 'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/', + 'info_dict': { + 'id': 'c65b465ad0c98c89f3b25cb03dcc87c6', + 'ext': 'mp4', + 'chapters': 'count:4', + 'categories': ['Бизнес и предпринимательство'], + 'description': 'md5:252feac1305257d8c1bab215cedde75d', + 'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png', + 'duration': 782, + 'age_limit': 0, + 'uploader_id': '23491359', + 'timestamp': 1677153329, + 'view_count': int, + 'upload_date': '20230223', + 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании', + 'uploader': 'Стас Быков', + }, + 'expected_warnings': ['Unable to download f4m'], + }] + + @classmethod + def suitable(cls, url): + return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url) + + def _real_extract(self, url): + video_id = self._match_id(url) + query = parse_qs(url) + info = self._download_and_extract_info(video_id, query) + info['formats'] = self._download_and_extract_formats(video_id, query) + return info + + +class RutubeEmbedIE(RutubeBaseIE): + IE_NAME = 'rutube:embed' + IE_DESC = 'Rutube embedded videos' + _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)' + + _TESTS = [{ + 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', + 'info_dict': { + 'id': 'a10e53b86e8f349080f718582ce4c661', + 'ext': 'mp4', + 'timestamp': 1387830582, + 'upload_date': '20131223', + 'uploader_id': '297833', + 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', + 'uploader': 'subziro89 ILya', + 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://rutube.ru/play/embed/8083783', + 'only_matching': True, + }, { + # private video + 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ', + 'only_matching': True, + }] + + def _real_extract(self, url): + embed_id = self._match_id(url) + # Query may contain private videos token and should be passed to API + # requests (see #19163) + query = parse_qs(url) + options = self._download_api_options(embed_id, query) + video_id = options['effective_video'] + formats = self._extract_formats(options, video_id) + info = self._download_and_extract_info(video_id, query) + info.update({ + 'extractor_key': 'Rutube', + 'formats': formats, + }) + return info + + +class RutubePlaylistBaseIE(RutubeBaseIE): + def _next_page_url(self, page_num, playlist_id, *args, **kwargs): + return self._PAGE_TEMPLATE % (playlist_id, page_num) + + def _entries(self, playlist_id, *args, **kwargs): + next_page_url = None + for pagenum in itertools.count(1): + page = self._download_json( + next_page_url or self._next_page_url( + pagenum, playlist_id, *args, **kwargs), + playlist_id, 'Downloading page %s' % pagenum) + + results = page.get('results') + if not results or not isinstance(results, list): + break + + for result in results: + video_url = url_or_none(result.get('video_url')) + if not video_url: + continue + entry = self._extract_info(result, require_title=False) + entry.update({ + '_type': 'url', + 'url': video_url, + 'ie_key': RutubeIE.ie_key(), + }) + yield entry + + next_page_url = page.get('next') + if not next_page_url or not page.get('has_next'): + break + + def _extract_playlist(self, playlist_id, *args, **kwargs): + return self.playlist_result( + self._entries(playlist_id, *args, **kwargs), + playlist_id, kwargs.get('playlist_name')) + + def _real_extract(self, url): + return self._extract_playlist(self._match_id(url)) + + +class RutubeTagsIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:tags' + IE_DESC = 'Rutube tags' + _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://rutube.ru/tags/video/1800/', + 'info_dict': { + 'id': '1800', + }, + 'playlist_mincount': 68, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + + +class RutubeMovieIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:movie' + IE_DESC = 'Rutube movies' + _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' + + _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' + _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + + def _real_extract(self, url): + movie_id = self._match_id(url) + movie = self._download_json( + self._MOVIE_TEMPLATE % movie_id, movie_id, + 'Downloading movie JSON') + return self._extract_playlist( + movie_id, playlist_name=movie.get('name')) + + +class RutubePersonIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:person' + IE_DESC = 'Rutube person videos' + _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://rutube.ru/video/person/313878/', + 'info_dict': { + 'id': '313878', + }, + 'playlist_mincount': 37, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + + +class RutubePlaylistIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:playlist' + IE_DESC = 'Rutube playlists' + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', + 'info_dict': { + 'id': '3097', + }, + 'playlist_count': 27, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', + 'only_matching': True, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' + + @classmethod + def suitable(cls, url): + from ..utils import int_or_none, parse_qs + + if not super(RutubePlaylistIE, cls).suitable(url): + return False + params = parse_qs(url) + return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) + + def _next_page_url(self, page_num, playlist_id, item_kind): + return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) + + def _real_extract(self, url): + qs = parse_qs(url) + playlist_kind = qs['pl_type'][0] + playlist_id = qs['pl_id'][0] + return self._extract_playlist(playlist_id, item_kind=playlist_kind) + + +class RutubeChannelIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:channel' + IE_DESC = 'Rutube channel' + _VALID_URL = r'https?://rutube\.ru/channel/(?P<id>\d+)/videos' + _TESTS = [{ + 'url': 'https://rutube.ru/channel/639184/videos/', + 'info_dict': { + 'id': '639184', + }, + 'playlist_mincount': 133, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py new file mode 100644 index 0000000..d7f9a73 --- /dev/null +++ b/yt_dlp/extractor/rutv.py @@ -0,0 +1,203 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + str_to_int +) + + +class RUTVIE(InfoExtractor): + IE_DESC = 'RUTV.RU' + _VALID_URL = r'''(?x) + https?:// + (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/ + (?P<path> + flash\d+v/container\.swf\?id=| + iframe/(?P<type>swf|video|live)/id/| + index/iframe/cast_id/ + ) + (?P<id>\d+) + ''' + _EMBED_URLS = [ + r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', + r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', + ] + + _TESTS = [ + { + 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', + 'info_dict': { + 'id': '774471', + 'ext': 'mp4', + 'title': 'Монологи на все времена', + 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', + 'duration': 2906, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', + 'info_dict': { + 'id': '774016', + 'ext': 'mp4', + 'title': 'Чужой в семье Сталина', + 'description': '', + 'duration': 2539, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', + 'info_dict': { + 'id': '766888', + 'ext': 'mp4', + 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', + 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', + 'duration': 279, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', + 'info_dict': { + 'id': '771852', + 'ext': 'mp4', + 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', + 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', + 'duration': 3096, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', + 'info_dict': { + 'id': '51499', + 'ext': 'flv', + 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', + 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', + }, + 'skip': 'Translation has finished', + }, + { + 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/', + 'info_dict': { + 'id': '21', + 'ext': 'mp4', + 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + video_path = mobj.group('path') + + if re.match(r'flash\d+v', video_path): + video_type = 'video' + elif video_path.startswith('iframe'): + video_type = mobj.group('type') + if video_type == 'swf': + video_type = 'video' + elif video_path.startswith('index/iframe/cast_id'): + video_type = 'live' + + is_live = video_type == 'live' + + json_data = self._download_json( + 'http://player.vgtrk.com/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id), + video_id, 'Downloading JSON') + + if json_data['errors']: + raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True) + + playlist = json_data['data']['playlist'] + medialist = playlist['medialist'] + media = medialist[0] + + if media['errors']: + raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True) + + view_count = int_or_none(playlist.get('count_views')) + priority_transport = playlist['priority_transport'] + + thumbnail = media['picture'] + width = int_or_none(media['width']) + height = int_or_none(media['height']) + description = media['anons'] + title = media['title'] + duration = int_or_none(media.get('duration')) + + formats = [] + subtitles = {} + + for transport, links in media['sources'].items(): + for quality, url in links.items(): + preference = -1 if priority_transport == transport else -2 + if transport == 'rtmp': + mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url) + if not mobj: + continue + fmt = { + 'url': mobj.group('url'), + 'play_path': mobj.group('playpath'), + 'app': mobj.group('app'), + 'page_url': 'http://player.rutv.ru', + 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22', + 'rtmp_live': True, + 'ext': 'flv', + 'vbr': str_to_int(quality), + } + elif transport == 'm3u8': + fmt, subs = self._extract_m3u8_formats_and_subtitles( + url, video_id, 'mp4', quality=preference, m3u8_id='hls') + formats.extend(fmt) + self._merge_subtitles(subs, target=subtitles) + continue + else: + fmt = { + 'url': url + } + fmt.update({ + 'width': int_or_none(quality, default=height, invscale=width, scale=height), + 'height': int_or_none(quality, default=height), + 'format_id': '%s-%s' % (transport, quality), + 'source_preference': preference, + }) + formats.append(fmt) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'view_count': view_count, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + '_format_sort_fields': ('source', ), + } diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py new file mode 100644 index 0000000..33f6652 --- /dev/null +++ b/yt_dlp/extractor/ruutu.py @@ -0,0 +1,262 @@ +import json +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +from ..utils import ( + determine_ext, + ExtractorError, + find_xpath_attr, + int_or_none, + traverse_obj, + try_call, + unified_strdate, + url_or_none, + xpath_attr, + xpath_text, +) + + +class RuutuIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/| + static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid= + ) + (?P<id>\d+) + ''' + _TESTS = [ + { + 'url': 'http://www.ruutu.fi/video/2058907', + 'md5': 'ab2093f39be1ca8581963451b3c0234f', + 'info_dict': { + 'id': '2058907', + 'ext': 'mp4', + 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', + 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 114, + 'age_limit': 0, + 'upload_date': '20150508', + }, + }, + { + 'url': 'http://www.ruutu.fi/video/2057306', + 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', + 'info_dict': { + 'id': '2057306', + 'ext': 'mp4', + 'title': 'Superpesis: katso koko kausi Ruudussa', + 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 40, + 'age_limit': 0, + 'upload_date': '20150507', + 'series': 'Superpesis', + 'categories': ['Urheilu'], + }, + }, + { + 'url': 'http://www.supla.fi/supla/2231370', + 'md5': 'df14e782d49a2c0df03d3be2a54ef949', + 'info_dict': { + 'id': '2231370', + 'ext': 'mp4', + 'title': 'Osa 1: Mikael Jungner', + 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + 'upload_date': '20151012', + 'series': 'Läpivalaisu', + }, + }, + # Episode where <SourceFile> is "NOT-USED", but has other + # downloadable sources available. + { + 'url': 'http://www.ruutu.fi/video/3193728', + 'only_matching': True, + }, + { + # audio podcast + 'url': 'https://www.supla.fi/supla/3382410', + 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', + 'info_dict': { + 'id': '3382410', + 'ext': 'mp3', + 'title': 'Mikä ihmeen poltergeist?', + 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + 'upload_date': '20190320', + 'series': 'Mysteeritarinat', + 'duration': 1324, + }, + 'expected_warnings': [ + 'HTTP Error 502: Bad Gateway', + 'Failed to download m3u8 information', + ], + }, + { + 'url': 'http://www.supla.fi/audio/2231370', + 'only_matching': True, + }, + { + 'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790', + 'only_matching': True, + }, + { + # episode + 'url': 'https://www.ruutu.fi/video/3401964', + 'info_dict': { + 'id': '3401964', + 'ext': 'mp4', + 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17', + 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2582, + 'age_limit': 12, + 'upload_date': '20190508', + 'series': 'Temptation Island Suomi', + 'season_number': 5, + 'episode_number': 17, + 'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'], + }, + 'params': { + 'skip_download': True, + }, + }, + { + # premium + 'url': 'https://www.ruutu.fi/video/3618715', + 'only_matching': True, + }, + ] + _API_BASE = 'https://gatling.nelonenmedia.fi' + + @classmethod + def _extract_embed_urls(cls, url, webpage): + # nelonen.fi + settings = try_call( + lambda: json.loads(re.search( + r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False)) + if settings: + video_id = traverse_obj(settings, ( + 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) + if video_id: + return [f'http://www.ruutu.fi/video/{video_id}'] + # hs.fi and is.fi + settings = try_call( + lambda: json.loads(re.search( + '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', + webpage).group(1), strict=False)) + if settings: + video_ids = set(traverse_obj(settings, ( + 'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or []) + if video_ids: + return [f'http://www.ruutu.fi/video/{v}' for v in video_ids] + video_id = traverse_obj(settings, ( + 'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId')) + if video_id: + return [f'http://www.ruutu.fi/video/{video_id}'] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_xml = self._download_xml( + '%s/media-xml-cache' % self._API_BASE, video_id, + query={'id': video_id}) + + formats = [] + processed_urls = [] + + def extract_formats(node): + for child in node: + if child.tag.endswith('Files'): + extract_formats(child) + elif child.tag.endswith('File'): + video_url = child.text + if (not video_url or video_url in processed_urls + or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): + continue + processed_urls.append(video_url) + ext = determine_ext(video_url) + auth_video_url = url_or_none(self._download_webpage( + '%s/auth/access/v2' % self._API_BASE, video_id, + note='Downloading authenticated %s stream URL' % ext, + fatal=False, query={'stream': video_url})) + if auth_video_url: + processed_urls.append(auth_video_url) + video_url = auth_video_url + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds', fatal=False)) + elif ext == 'mpd': + # video-only and audio-only streams are of different + # duration resulting in out of sync issue + continue + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp3' or child.tag == 'AudioMediaFile': + formats.append({ + 'format_id': 'audio', + 'url': video_url, + 'vcodec': 'none', + }) + else: + proto = compat_urllib_parse_urlparse(video_url).scheme + if not child.tag.startswith('HTTP') and proto != 'rtmp': + continue + preference = -1 if proto == 'rtmp' else 1 + label = child.get('label') + tbr = int_or_none(child.get('bitrate')) + format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto + if not self._is_valid_url(video_url, video_id, format_id): + continue + width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'width': width, + 'height': height, + 'tbr': tbr, + 'preference': preference, + }) + + extract_formats(video_xml.find('./Clip')) + + def pv(name): + value = try_call(lambda: find_xpath_attr( + video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value')) + if value != 'NA': + return value or None + + if not formats: + if (not self.get_param('allow_unplayable_formats') + and xpath_text(video_xml, './Clip/DRM', default=None)): + self.report_drm(video_id) + ns_st_cds = pv('ns_st_cds') + if ns_st_cds != 'free': + raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) + + themes = pv('themes') + + return { + 'id': video_id, + 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), + 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), + 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), + 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')), + 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), + 'upload_date': unified_strdate(pv('date_start')), + 'series': pv('series_name'), + 'season_number': int_or_none(pv('season_number')), + 'episode_number': int_or_none(pv('episode_number')), + 'categories': themes.split(',') if themes else None, + 'formats': formats, + } diff --git a/yt_dlp/extractor/ruv.py b/yt_dlp/extractor/ruv.py new file mode 100644 index 0000000..12499d6 --- /dev/null +++ b/yt_dlp/extractor/ruv.py @@ -0,0 +1,186 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + parse_duration, + traverse_obj, + unified_timestamp, +) + + +class RuvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)' + _TESTS = [{ + # m3u8 + 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', + 'md5': '66347652f4e13e71936817102acc1724', + 'info_dict': { + 'id': '1144499', + 'display_id': 'fh-valur/20170516', + 'ext': 'mp4', + 'title': 'FH - Valur', + 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', + 'timestamp': 1494963600, + 'upload_date': '20170516', + }, + }, { + # mp3 + 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', + 'md5': '395ea250c8a13e5fdb39d4670ef85378', + 'info_dict': { + 'id': '1153630', + 'display_id': 'morgunutvarpid/20170619', + 'ext': 'mp3', + 'title': 'Morgunútvarpið', + 'description': 'md5:a4cf1202c0a1645ca096b06525915418', + 'timestamp': 1497855000, + 'upload_date': '20170619', + }, + }, { + 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', + 'only_matching': True, + }, { + 'url': 'http://www.ruv.is/node/1151854', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + title = self._og_search_title(webpage) + + FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1' + + media_url = self._html_search_regex( + FIELD_RE % 'src', webpage, 'video URL', group='url') + + video_id = self._search_regex( + r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)', + webpage, 'video id', default=display_id) + + ext = determine_ext(media_url) + + if ext == 'm3u8': + formats = self._extract_m3u8_formats( + media_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + elif ext == 'mp3': + formats = [{ + 'format_id': 'mp3', + 'url': media_url, + 'vcodec': 'none', + }] + else: + formats = [{ + 'url': media_url, + }] + + description = self._og_search_description(webpage, default=None) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._search_regex( + FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) + timestamp = unified_timestamp(self._html_search_meta( + 'article:published_time', webpage, 'timestamp', fatal=False)) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'formats': formats, + } + + +class RuvSpilaIE(InfoExtractor): + IE_NAME = 'ruv.is:spila' + _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:(?:sjon|ut)varp|(?:krakka|ung)ruv)/spila/.+/(?P<series_id>[0-9]+)/(?P<id>[a-z0-9]+)' + _TESTS = [{ + 'url': 'https://www.ruv.is/sjonvarp/spila/ithrottir/30657/9jcnd4', + 'info_dict': { + 'id': '9jcnd4', + 'ext': 'mp4', + 'title': '01.02.2022', + 'chapters': 'count:4', + 'timestamp': 1643743500, + 'upload_date': '20220201', + 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/94boog-iti3jg.jpg', + 'description': 'Íþróttafréttir.', + 'age_limit': 0, + }, + }, { + 'url': 'https://www.ruv.is/utvarp/spila/i-ljosi-sogunnar/23795/7hqkre', + 'info_dict': { + 'id': '7hqkre', + 'ext': 'mp3', + 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/7hqkre-7uepao.jpg', + 'description': 'md5:8d7046549daff35e9a3190dc9901a120', + 'chapters': [], + 'upload_date': '20220204', + 'timestamp': 1643965500, + 'title': 'Nellie Bly II', + 'age_limit': 0, + }, + }, { + 'url': 'https://www.ruv.is/ungruv/spila/ungruv/28046/8beuph', + 'only_matching': True + }, { + 'url': 'https://www.ruv.is/krakkaruv/spila/krakkafrettir/30712/9jbgb0', + 'only_matching': True + }] + + def _real_extract(self, url): + display_id, series_id = self._match_valid_url(url).group('id', 'series_id') + program = self._download_json( + 'https://www.ruv.is/gql/', display_id, query={'query': '''{ + Program(id: %s){ + title image description short_description + episodes(id: {value: "%s"}) { + rating title duration file image firstrun description + clips { + time text + } + subtitles { + name value + } + } + } + }''' % (series_id, display_id)})['data']['Program'] + episode = program['episodes'][0] + + subs = {} + for trk in episode.get('subtitles'): + if trk.get('name') and trk.get('value'): + subs.setdefault(trk['name'], []).append({'url': trk['value'], 'ext': 'vtt'}) + + media_url = episode['file'] + if determine_ext(media_url) == 'm3u8': + formats = self._extract_m3u8_formats(media_url, display_id) + else: + formats = [{'url': media_url}] + + clips = [ + {'start_time': parse_duration(c.get('time')), 'title': c.get('text')} + for c in episode.get('clips') or []] + + return { + 'id': display_id, + 'title': traverse_obj(program, ('episodes', 0, 'title'), 'title'), + 'description': traverse_obj( + program, ('episodes', 0, 'description'), 'description', 'short_description', + expected_type=lambda x: x or None), + 'subtitles': subs, + 'thumbnail': episode.get('image', '').replace('$$IMAGESIZE$$', '1960') or None, + 'timestamp': unified_timestamp(episode.get('firstrun')), + 'formats': formats, + 'age_limit': episode.get('rating'), + 'chapters': clips + } diff --git a/yt_dlp/extractor/s4c.py b/yt_dlp/extractor/s4c.py new file mode 100644 index 0000000..67eff72 --- /dev/null +++ b/yt_dlp/extractor/s4c.py @@ -0,0 +1,103 @@ +from .common import InfoExtractor +from ..utils import traverse_obj, url_or_none + + +class S4CIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.s4c.cymru/clic/programme/861362209', + 'info_dict': { + 'id': '861362209', + 'ext': 'mp4', + 'title': 'Y Swn', + 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0', + 'duration': 5340, + 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg' + }, + }, { + 'url': 'https://www.s4c.cymru/clic/programme/856636948', + 'info_dict': { + 'id': '856636948', + 'ext': 'mp4', + 'title': 'Am Dro', + 'duration': 2880, + 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', + 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg' + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + details = self._download_json( + f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}', + video_id, fatal=False) + + player_config = self._download_json( + 'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={ + 'programme_id': video_id, + 'signed': '0', + 'lang': 'en', + 'mode': 'od', + 'appId': 'clic', + 'streamName': '', + }, note='Downloading player config JSON') + subtitles = {} + for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))): + subtitles.setdefault(sub.get('3', 'en'), []).append({ + 'url': sub['0'], + 'name': sub.get('1'), + }) + m3u8_url = self._download_json( + 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={ + 'mode': 'od', + 'application': 'clic', + 'region': 'WW', + 'extra': 'false', + 'thirdParty': 'false', + 'filename': player_config['filename'], + }, note='Downloading streaming urls JSON')['hls'] + + return { + 'id': video_id, + 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'), + 'subtitles': subtitles, + 'thumbnail': url_or_none(player_config.get('poster')), + **traverse_obj(details, ('full_prog_details', 0, { + 'title': (('programme_title', 'series_title'), {str}), + 'description': ('full_billing', {str.strip}), + 'duration': ('duration', {lambda x: int(x) * 60}), + }), get_all=False), + } + + +class S4CSeriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.s4c.cymru/clic/series/864982911', + 'playlist_mincount': 6, + 'info_dict': { + 'id': '864982911', + 'title': 'Iaith ar Daith', + }, + }, { + 'url': 'https://www.s4c.cymru/clic/series/866852587', + 'playlist_mincount': 8, + 'info_dict': { + 'id': '866852587', + 'title': 'FFIT Cymru', + }, + }] + + def _real_extract(self, url): + series_id = self._match_id(url) + series_details = self._download_json( + 'https://www.s4c.cymru/df/series_details', series_id, query={ + 'lang': 'e', + 'series_id': series_id, + 'show_prog_in_series': 'Y' + }, note='Downloading series details JSON') + + return self.playlist_result( + [self.url_result(f'https://www.s4c.cymru/clic/programme/{episode_id}', S4CIE, episode_id) + for episode_id in traverse_obj(series_details, ('other_progs_in_series', ..., 'id'))], + series_id, traverse_obj(series_details, ('full_prog_details', 0, 'series_title', {str}))) diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py new file mode 100644 index 0000000..8d322d7 --- /dev/null +++ b/yt_dlp/extractor/safari.py @@ -0,0 +1,259 @@ +import json +import re + +from .common import InfoExtractor + +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + update_url_query, +) + + +class SafariBaseIE(InfoExtractor): + _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/' + _NETRC_MACHINE = 'safari' + + _API_BASE = 'https://learning.oreilly.com/api/v1' + _API_FORMAT = 'json' + + LOGGED_IN = False + + def _perform_login(self, username, password): + _, urlh = self._download_webpage_handle( + 'https://learning.oreilly.com/accounts/login-check/', None, + 'Downloading login page') + + def is_logged(urlh): + return 'learning.oreilly.com/home/' in urlh.url + + if is_logged(urlh): + self.LOGGED_IN = True + return + + redirect_url = urlh.url + parsed_url = compat_urlparse.urlparse(redirect_url) + qs = compat_parse_qs(parsed_url.query) + next_uri = compat_urlparse.urljoin( + 'https://api.oreilly.com', qs['next'][0]) + + auth, urlh = self._download_json_handle( + 'https://www.oreilly.com/member/auth/login/', None, 'Logging in', + data=json.dumps({ + 'email': username, + 'password': password, + 'redirect_uri': next_uri, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Referer': redirect_url, + }, expected_status=400) + + credentials = auth.get('credentials') + if (not auth.get('logged_in') and not auth.get('redirect_uri') + and credentials): + raise ExtractorError( + 'Unable to login: %s' % credentials, expected=True) + + # oreilly serves two same instances of the following cookies + # in Set-Cookie header and expects first one to be actually set + for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'): + self._apply_first_set_cookie_header(urlh, cookie) + + _, urlh = self._download_webpage_handle( + auth.get('redirect_uri') or next_uri, None, 'Completing login',) + + if is_logged(urlh): + self.LOGGED_IN = True + return + + raise ExtractorError('Unable to log in') + + +class SafariIE(SafariBaseIE): + IE_NAME = 'safari' + IE_DESC = 'safaribooksonline.com online video' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ + (?: + library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html| + videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+) + ) + ''' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', + 'md5': 'dcc5a425e79f2564148652616af1f2a3', + 'info_dict': { + 'id': '0_qbqx90ic', + 'ext': 'mp4', + 'title': 'Introduction to Hadoop Fundamentals LiveLessons', + 'timestamp': 1437758058, + 'upload_date': '20150724', + 'uploader_id': 'stork', + }, + }, { + # non-digits in course id + 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', + 'only_matching': True, + }, { + 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', + 'only_matching': True, + }, { + 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html', + 'only_matching': True, + }] + + _PARTNER_ID = '1926081' + _UICONF_ID = '29375172' + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + + reference_id = mobj.group('reference_id') + if reference_id: + video_id = reference_id + partner_id = self._PARTNER_ID + ui_id = self._UICONF_ID + else: + video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part')) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + mobj = re.match(self._VALID_URL, urlh.url) + reference_id = mobj.group('reference_id') + if not reference_id: + reference_id = self._search_regex( + r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'kaltura reference id', group='id') + partner_id = self._search_regex( + r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'kaltura widget id', default=self._PARTNER_ID, + group='id') + ui_id = self._search_regex( + r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'kaltura uiconf id', default=self._UICONF_ID, + group='id') + + query = { + 'wid': '_%s' % partner_id, + 'uiconf_id': ui_id, + 'flashvars[referenceId]': reference_id, + } + + if self.LOGGED_IN: + kaltura_session = self._download_json( + '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), + video_id, 'Downloading kaltura session JSON', + 'Unable to download kaltura session JSON', fatal=False, + headers={'Accept': 'application/json'}) + if kaltura_session: + session = kaltura_session.get('session') + if session: + query['flashvars[ks]'] = session + + return self.url_result(update_url_query( + 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), + 'Kaltura') + + +class SafariApiIE(SafariBaseIE): + IE_NAME = 'safari:api' + _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + part = self._download_json( + url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), + 'Downloading part JSON') + web_url = part['web_url'] + if 'library/view' in web_url: + web_url = web_url.replace('library/view', 'videos') + natural_keys = part['natural_key'] + web_url = f'{web_url.rsplit("/", 1)[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}' + return self.url_result(web_url, SafariIE.ie_key()) + + +class SafariCourseIE(SafariBaseIE): + IE_NAME = 'safari:course' + IE_DESC = 'safaribooksonline.com online courses' + + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ + (?: + library/view/[^/]+| + api/v1/book| + videos/[^/]+ + )| + techbus\.safaribooksonline\.com + ) + /(?P<id>[^/]+) + ''' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', + 'info_dict': { + 'id': '9780133392838', + 'title': 'Hadoop Fundamentals LiveLessons', + }, + 'playlist_count': 22, + 'skip': 'Requires safaribooksonline account credentials', + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', + 'only_matching': True, + }, { + 'url': 'http://techbus.safaribooksonline.com/9780134426365', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', + 'only_matching': True, + }, { + 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', + 'only_matching': True, + }, { + 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url) + else super(SafariCourseIE, cls).suitable(url)) + + def _real_extract(self, url): + course_id = self._match_id(url) + + course_json = self._download_json( + '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), + course_id, 'Downloading course JSON') + + if 'chapters' not in course_json: + raise ExtractorError( + 'No chapters found for course %s' % course_id, expected=True) + + entries = [ + self.url_result(chapter, SafariApiIE.ie_key()) + for chapter in course_json['chapters']] + + course_title = course_json['title'] + + return self.playlist_result(entries, course_id, course_title) diff --git a/yt_dlp/extractor/saitosan.py b/yt_dlp/extractor/saitosan.py new file mode 100644 index 0000000..a5f05e1 --- /dev/null +++ b/yt_dlp/extractor/saitosan.py @@ -0,0 +1,75 @@ +from .common import InfoExtractor +from ..utils import ExtractorError, try_get + + +class SaitosanIE(InfoExtractor): + _WORKING = False + IE_NAME = 'Saitosan' + _VALID_URL = r'https?://(?:www\.)?saitosan\.net/bview.html\?id=(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.saitosan.net/bview.html?id=10031846', + 'info_dict': { + 'id': '10031846', + 'ext': 'mp4', + 'title': '井下原 和弥', + 'uploader': '井下原 和弥', + 'thumbnail': 'http://111.171.196.85:8088/921f916f-7f55-4c97-b92e-5d9d0fef8f5f/thumb', + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Broadcasts are ephemeral', + }, + { + 'url': 'http://www.saitosan.net/bview.html?id=10031795', + 'info_dict': { + 'id': '10031795', + 'ext': 'mp4', + 'title': '橋本', + 'uploader': '橋本', + 'thumbnail': 'http://111.171.196.85:8088/1a3933e1-a01a-483b-8931-af15f37f8082/thumb', + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Broadcasts are ephemeral', + }] + + def _real_extract(self, url): + b_id = self._match_id(url) + + base = 'http://hankachi.saitosan-api.net:8002/socket.io/?transport=polling&EIO=3' + sid = self._download_socket_json(base, b_id, note='Opening socket').get('sid') + base += '&sid=' + sid + + self._download_webpage(base, b_id, note='Polling socket') + payload = '420["room_start_join",{"room_id":"%s"}]' % b_id + payload = '%s:%s' % (len(payload), payload) + + self._download_webpage(base, b_id, data=payload, note='Polling socket with payload') + response = self._download_socket_json(base, b_id, note='Polling socket') + if not response.get('ok'): + err = response.get('error') or {} + raise ExtractorError( + '%s said: %s - %s' % (self.IE_NAME, err.get('code', '?'), err.get('msg', 'Unknown')) if err + else 'The socket reported that the broadcast could not be joined. Maybe it\'s offline or the URL is incorrect', + expected=True, video_id=b_id) + + self._download_webpage(base, b_id, data='26:421["room_finish_join",{}]', note='Polling socket') + b_data = self._download_socket_json(base, b_id, note='Getting broadcast metadata from socket') + m3u8_url = b_data.get('url') + + self._download_webpage(base, b_id, data='1:1', note='Closing socket', fatal=False) + + return { + 'id': b_id, + 'title': b_data.get('name'), + 'formats': self._extract_m3u8_formats(m3u8_url, b_id, 'mp4', live=True), + 'thumbnail': m3u8_url.replace('av.m3u8', 'thumb'), + 'uploader': try_get(b_data, lambda x: x['broadcast_user']['name']), # same as title + 'is_live': True + } diff --git a/yt_dlp/extractor/samplefocus.py b/yt_dlp/extractor/samplefocus.py new file mode 100644 index 0000000..e9f5c22 --- /dev/null +++ b/yt_dlp/extractor/samplefocus.py @@ -0,0 +1,97 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + get_element_by_attribute, + int_or_none, +) + + +class SampleFocusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)' + _TESTS = [{ + 'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar', + 'md5': '48c8d62d60be467293912e0e619a5120', + 'info_dict': { + 'id': '40316', + 'display_id': 'lil-peep-sad-emo-guitar', + 'ext': 'mp3', + 'title': 'Lil Peep Sad Emo Guitar', + 'thumbnail': r're:^https?://.+\.png', + 'license': 'Standard License', + 'uploader': 'CapsCtrl', + 'uploader_id': 'capsctrl', + 'like_count': int, + 'comment_count': int, + 'categories': ['Samples', 'Guitar', 'Electric guitar'], + }, + }, { + 'url': 'https://samplefocus.com/samples/dababy-style-bass-808', + 'only_matching': True + }, { + 'url': 'https://samplefocus.com/samples/young-chop-kick', + 'only_matching': True + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + sample_id = self._search_regex( + r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)', + webpage, 'sample id', group='id') + + title = self._og_search_title(webpage, fatal=False) or self._html_search_regex( + r'<h1>(.+?)</h1>', webpage, 'title') + + mp3_url = self._search_regex( + r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)', + webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex( + r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>', + webpage, 'mp3 url', group=0))['content'] + + thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex( + r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)', + webpage, 'mp3', fatal=False, group='url') + + comments = [] + for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage): + comments.append({ + 'author': author, + 'author_id': author_id, + 'text': body, + }) + + uploader_id = uploader = None + mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage) + if mobj: + uploader_id, uploader = mobj.groups() + + breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage) + categories = [] + if breadcrumb: + for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb): + categories.append(name) + + def extract_count(klass): + return int_or_none(self._html_search_regex( + r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass, + webpage, klass, fatal=False)) + + return { + 'id': sample_id, + 'title': title, + 'url': mp3_url, + 'display_id': display_id, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'license': self._html_search_regex( + r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<', + webpage, 'license', fatal=False, group='license'), + 'uploader_id': uploader_id, + 'like_count': extract_count('sample-%s-favorites' % sample_id), + 'comment_count': extract_count('comments'), + 'comments': comments, + 'categories': categories, + } diff --git a/yt_dlp/extractor/sapo.py b/yt_dlp/extractor/sapo.py new file mode 100644 index 0000000..beffaee --- /dev/null +++ b/yt_dlp/extractor/sapo.py @@ -0,0 +1,114 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, +) + + +class SapoIE(InfoExtractor): + IE_DESC = 'SAPO Vídeos' + _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})' + + _TESTS = [ + { + 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi', + 'md5': '79ee523f6ecb9233ac25075dee0eda83', + 'note': 'SD video', + 'info_dict': { + 'id': 'UBz95kOtiWYUMTA5Ghfi', + 'ext': 'mp4', + 'title': 'Benfica - Marcas na Hitória', + 'description': 'md5:c9082000a128c3fd57bf0299e1367f22', + 'duration': 264, + 'uploader': 'tiago_1988', + 'upload_date': '20080229', + 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'], + }, + }, + { + 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF', + 'md5': '90a2f283cfb49193fe06e861613a72aa', + 'note': 'HD video', + 'info_dict': { + 'id': 'IyusNAZ791ZdoCY5H5IF', + 'ext': 'mp4', + 'title': 'Codebits VII - Report', + 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8', + 'duration': 144, + 'uploader': 'codebits', + 'upload_date': '20140427', + 'categories': ['codebits', 'codebits2014'], + }, + }, + { + 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz', + 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac', + 'note': 'v2 video', + 'info_dict': { + 'id': 'yLqjzPtbTimsn2wWBKHz', + 'ext': 'mp4', + 'title': 'Hipnose Condicionativa 4', + 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40', + 'duration': 692, + 'uploader': 'sapozen', + 'upload_date': '20090609', + 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'], + }, + }, + ] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + video_id = mobj.group('id') + + item = self._download_xml( + 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item') + + title = item.find('./title').text + description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text + thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url') + duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text) + uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text + upload_date = unified_strdate(item.find('./pubDate').text) + view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text) + comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text) + tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text + categories = tags.split() if tags else [] + age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0 + + video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text + video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x') + + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'format_id': 'sd', + 'width': int(video_size[0]), + 'height': int(video_size[1]), + }] + + if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true': + formats.append({ + 'url': re.sub(r'/mov/1$', '/mov/39', video_url), + 'ext': 'mp4', + 'format_id': 'hd', + 'width': 1280, + 'height': 720, + }) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': uploader, + 'upload_date': upload_date, + 'view_count': view_count, + 'comment_count': comment_count, + 'categories': categories, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py new file mode 100644 index 0000000..8d61e22 --- /dev/null +++ b/yt_dlp/extractor/sbs.py @@ -0,0 +1,156 @@ +from .common import InfoExtractor +from ..networking import HEADRequest +from ..utils import ( + float_or_none, + int_or_none, + parse_duration, + parse_iso8601, + traverse_obj, + update_url_query, + url_or_none, +) + + +class SBSIE(InfoExtractor): + IE_DESC = 'sbs.com.au' + _VALID_URL = r'''(?x) + https?://(?:www\.)?sbs\.com\.au/(?: + ondemand(?: + /video/(?:single/)?| + /(?:movie|tv-program)/[^/]+/| + /(?:tv|news)-series/(?:[^/]+/){3}| + .*?\bplay=|/watch/ + )|news/(?:embeds/)?video/ + )(?P<id>[0-9]+)''' + _EMBED_REGEX = [r'''(?x)] + (?: + <meta\s+property="og:video"\s+content=| + <iframe[^>]+?src= + ) + (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1'''] + + _TESTS = [{ + # Original URL is handled by the generic IE which finds the iframe: + # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation + 'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed', + 'md5': '31f84a7a19b53635db63c73f8ab0c4a7', + 'info_dict': { + 'id': '320403011771', # '_rFBPRPO4pMR', + 'ext': 'mp4', + 'title': 'Dingo Conservation (The Feed)', + 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', + 'thumbnail': r're:https?://.*\.jpg', + 'duration': 308, + 'timestamp': 1408613220, + 'upload_date': '20140821', + 'uploader': 'SBSC', + }, + 'expected_warnings': ['Unable to download JSON metadata'], + }, { + 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', + 'only_matching': True, + }, { + 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/movie/coherence/1469404227931', + 'only_matching': True, + }, { + 'note': 'Live stream', + 'url': 'https://www.sbs.com.au/ondemand/video/1726824003663/sbs-24x7-live-stream-nsw', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/news-series/dateline/dateline-2022/dateline-s2022-ep26/2072245827515', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/tv-series/the-handmaids-tale/season-5/the-handmaids-tale-s5-ep1/2065631811776', + 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/tv-program/autun-romes-forgotten-sister/2116212803602', + 'only_matching': True, + }] + + _GEO_COUNTRIES = ['AU'] + _AUS_TV_PARENTAL_GUIDELINES = { + 'P': 0, + 'C': 7, + 'G': 0, + 'PG': 0, + 'M': 14, + 'MA15+': 15, + 'MAV15+': 15, + 'R18+': 18, + } + _PLAYER_API = 'https://www.sbs.com.au/api/v3' + + def _real_extract(self, url): + video_id = self._match_id(url) + formats, subtitles = self._extract_smil_formats_and_subtitles( + update_url_query(f'{self._PLAYER_API}/video_smil', {'id': video_id}), video_id) + + if not formats: + urlh = self._request_webpage( + HEADRequest('https://sbs-vod-prod-01.akamaized.net/'), video_id, + note='Checking geo-restriction', fatal=False, expected_status=403) + if urlh: + error_reasons = urlh.headers.get_all('x-error-reason') or [] + if 'geo-blocked' in error_reasons: + self.raise_geo_restricted(countries=['AU']) + self.raise_no_formats('No formats are available', video_id=video_id) + + media = traverse_obj(self._download_json( + f'{self._PLAYER_API}/video_stream', video_id, fatal=False, + query={'id': video_id, 'context': 'tv'}), ('video_object', {dict})) or {} + + media.update(self._download_json( + f'https://catalogue.pr.sbsod.com/mpx-media/{video_id}', + video_id, fatal=not media) or {}) + + # For named episodes, use the catalogue's title to set episode, rather than generic 'Episode N'. + if traverse_obj(media, ('partOfSeries', {dict})): + media['epName'] = traverse_obj(media, ('title', {str})) + + return { + 'id': video_id, + **traverse_obj(media, { + 'title': ('name', {str}), + 'description': ('description', {str}), + 'channel': ('taxonomy', 'channel', 'name', {str}), + 'series': ((('partOfSeries', 'name'), 'seriesTitle'), {str}), + 'series_id': ((('partOfSeries', 'uuid'), 'seriesID'), {str}), + 'season_number': ('seasonNumber', {int_or_none}), + 'episode': ('epName', {str}), + 'episode_number': ('episodeNumber', {int_or_none}), + 'timestamp': (('datePublished', ('publication', 'startDate')), {parse_iso8601}), + 'release_year': ('releaseYear', {int_or_none}), + 'duration': ('duration', ({float_or_none}, {parse_duration})), + 'is_live': ('liveStream', {bool}), + 'age_limit': (('classificationID', 'contentRating'), {str.upper}, { + lambda x: self._AUS_TV_PARENTAL_GUIDELINES.get(x)}), # dict.get is unhashable in py3.7 + }, get_all=False), + **traverse_obj(media, { + 'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}), + 'tags': (('consumerAdviceTexts', ('sbsSubCertification', 'consumerAdvice')), ..., {str}), + 'thumbnails': ('thumbnails', lambda _, v: url_or_none(v['contentUrl']), { + 'id': ('name', {str}), + 'url': 'contentUrl', + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + }), + 'formats': formats, + 'subtitles': subtitles, + 'uploader': 'SBSC', + } diff --git a/yt_dlp/extractor/sbscokr.py b/yt_dlp/extractor/sbscokr.py new file mode 100644 index 0000000..001d19e --- /dev/null +++ b/yt_dlp/extractor/sbscokr.py @@ -0,0 +1,200 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + parse_iso8601, + parse_resolution, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class SBSCoKrIE(InfoExtractor): + IE_NAME = 'sbs.co.kr' + _VALID_URL = [r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Package)?EndPage\.do\?(?:[^#]+&)?mdaId=(?P<id>\d+)', + r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv|kth)/[a-z0-9]+/(?:vod|clip|movie)/\d+/(?P<id>(?:OC)?\d+)'] + + _TESTS = [{ + 'url': 'https://programs.sbs.co.kr/enter/dongsang2/clip/52007/OC467706746?div=main_pop_clip', + 'md5': 'c3f6d45e1fb5682039d94cda23c36f19', + 'info_dict': { + 'id': 'OC467706746', + 'ext': 'mp4', + 'title': '‘아슬아슬’ 박군♥한영의 새 집 인테리어 대첩♨', + 'description': 'md5:6a71eb1979ee4a94ea380310068ccab4', + 'thumbnail': 'https://img2.sbs.co.kr/ops_clip_img/2023/10/10/34c4c0f9-a9a5-4ff6-a92e-9bb4b5f6fa65915w1280.jpg', + 'release_timestamp': 1696889400, + 'release_date': '20231009', + 'view_count': int, + 'like_count': int, + 'duration': 238, + 'age_limit': 15, + 'series': '동상이몽2_너는 내 운명', + 'episode': '레이디제인, ‘혼전임신설’ ‘3개월’ 앞당긴 결혼식 비하인드 스토리 최초 공개!', + 'episode_number': 311, + }, + }, { + 'url': 'https://allvod.sbs.co.kr/allvod/vodPackageEndPage.do?mdaId=22000489324&combiId=PA000000284&packageType=A&isFreeYN=', + 'md5': 'bf46b2e89fda7ae7de01f5743cef7236', + 'info_dict': { + 'id': '22000489324', + 'ext': 'mp4', + 'title': '[다시보기] 트롤리 15회', + 'description': 'md5:0e55d74bef1ac55c61ae90c73ac485f4', + 'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/02/14/arC1676333794938-1280-720.jpg', + 'release_timestamp': 1676325600, + 'release_date': '20230213', + 'view_count': int, + 'like_count': int, + 'duration': 5931, + 'age_limit': 15, + 'series': '트롤리', + 'episode': '이거 다 거짓말이야', + 'episode_number': 15, + }, + }, { + 'url': 'https://programs.sbs.co.kr/enter/fourman/vod/69625/22000508948', + 'md5': '41e8ae4cc6c8424f4e4d76661a4becbf', + 'info_dict': { + 'id': '22000508948', + 'ext': 'mp4', + 'title': '[다시보기] 신발 벗고 돌싱포맨 104회', + 'description': 'md5:c6a247383c4dd661e4b956bf4d3b586e', + 'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/08/30/2vb1693355446261-1280-720.jpg', + 'release_timestamp': 1693342800, + 'release_date': '20230829', + 'view_count': int, + 'like_count': int, + 'duration': 7036, + 'age_limit': 15, + 'series': '신발 벗고 돌싱포맨', + 'episode': '돌싱포맨 저격수들 등장!', + 'episode_number': 104, + }, + }] + + def _call_api(self, video_id, rscuse=''): + return self._download_json( + f'https://api.play.sbs.co.kr/1.0/sbs_vodall/{video_id}', video_id, + note=f'Downloading m3u8 information {rscuse}', + query={ + 'platform': 'pcweb', + 'protocol': 'download', + 'absolute_show': 'Y', + 'service': 'program', + 'ssl': 'Y', + 'rscuse': rscuse, + }) + + def _real_extract(self, url): + video_id = self._match_id(url) + + details = self._call_api(video_id) + source = traverse_obj(details, ('vod', 'source', 'mediasource', {dict})) or {} + + formats = [] + for stream in traverse_obj(details, ( + 'vod', 'source', 'mediasourcelist', lambda _, v: v['mediaurl'] or v['mediarscuse'] + ), default=[source]): + if not stream.get('mediaurl'): + new_source = traverse_obj( + self._call_api(video_id, rscuse=stream['mediarscuse']), + ('vod', 'source', 'mediasource', {dict})) or {} + if new_source.get('mediarscuse') == source.get('mediarscuse') or not new_source.get('mediaurl'): + continue + stream = new_source + formats.append({ + 'url': stream['mediaurl'], + 'format_id': stream.get('mediarscuse'), + 'format_note': stream.get('medianame'), + **parse_resolution(stream.get('quality')), + 'preference': int_or_none(stream.get('mediarscuse')) + }) + + caption_url = traverse_obj(details, ('vod', 'source', 'subtitle', {url_or_none})) + + return { + 'id': video_id, + **traverse_obj(details, ('vod', { + 'title': ('info', 'title'), + 'duration': ('info', 'duration', {int_or_none}), + 'view_count': ('info', 'viewcount', {int_or_none}), + 'like_count': ('info', 'likecount', {int_or_none}), + 'description': ('info', 'synopsis', {clean_html}), + 'episode': ('info', 'content', ('contenttitle', 'title')), + 'episode_number': ('info', 'content', 'number', {int_or_none}), + 'series': ('info', 'program', 'programtitle'), + 'age_limit': ('info', 'targetage', {int_or_none}), + 'release_timestamp': ('info', 'broaddate', {parse_iso8601}), + 'thumbnail': ('source', 'thumbnail', 'origin', {url_or_none}), + }), get_all=False), + 'formats': formats, + 'subtitles': {'ko': [{'url': caption_url}]} if caption_url else None, + } + + +class SBSCoKrAllvodProgramIE(InfoExtractor): + IE_NAME = 'sbs.co.kr:allvod_program' + _VALID_URL = r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Free)?ProgramDetail\.do\?(?:[^#]+&)?pgmId=(?P<id>P?\d+)' + + _TESTS = [{ + 'url': 'https://allvod.sbs.co.kr/allvod/vodFreeProgramDetail.do?type=legend&pgmId=22000010159&listOrder=vodCntAsc', + 'info_dict': { + '_type': 'playlist', + 'id': '22000010159', + }, + 'playlist_count': 18, + }, { + 'url': 'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId=P460810577', + 'info_dict': { + '_type': 'playlist', + 'id': 'P460810577', + }, + 'playlist_count': 13, + }] + + def _real_extract(self, url): + program_id = self._match_id(url) + + details = self._download_json( + 'https://allvod.sbs.co.kr/allvod/vodProgramDetail/vodProgramDetailAjax.do', + program_id, note='Downloading program details', + query={ + 'pgmId': program_id, + 'currentCount': '10000', + }) + + return self.playlist_result( + [self.url_result(f'https://allvod.sbs.co.kr/allvod/vodEndPage.do?mdaId={video_id}', SBSCoKrIE) + for video_id in traverse_obj(details, ('list', ..., 'mdaId'))], program_id) + + +class SBSCoKrProgramsVodIE(InfoExtractor): + IE_NAME = 'sbs.co.kr:programs_vod' + _VALID_URL = r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv)/(?P<id>[a-z0-9]+)/vods' + + _TESTS = [{ + 'url': 'https://programs.sbs.co.kr/culture/morningwide/vods/65007', + 'info_dict': { + '_type': 'playlist', + 'id': '00000210215', + }, + 'playlist_mincount': 9782, + }, { + 'url': 'https://programs.sbs.co.kr/enter/dongsang2/vods/52006', + 'info_dict': { + '_type': 'playlist', + 'id': '22000010476', + }, + 'playlist_mincount': 312, + }] + + def _real_extract(self, url): + program_slug = self._match_id(url) + + program_id = self._download_json( + f'https://static.apis.sbs.co.kr/program-api/1.0/menu/{program_slug}', program_slug, + note='Downloading program menu data')['program']['programid'] + + return self.url_result( + f'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId={program_id}', SBSCoKrAllvodProgramIE) diff --git a/yt_dlp/extractor/screen9.py b/yt_dlp/extractor/screen9.py new file mode 100644 index 0000000..5ab0b6c --- /dev/null +++ b/yt_dlp/extractor/screen9.py @@ -0,0 +1,62 @@ +from .common import InfoExtractor +from ..utils import traverse_obj + + +class Screen9IE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.screen9\.(?:tv|com)|play\.su\.se)/(?:embed|media)/(?P<id>[^?#/]+)' + _TESTS = [ + { + 'url': 'https://api.screen9.com/embed/8kTNEjvoXGM33dmWwF0uDA', + 'md5': 'd60d23f8980583b930724b01fa6ddb41', + 'info_dict': { + 'id': '8kTNEjvoXGM33dmWwF0uDA', + 'ext': 'mp4', + 'title': 'Östersjön i förändrat klimat', + 'thumbnail': r're:^https?://.+\.jpg', + }, + }, + { + 'url': 'https://folkhogskolekanalen.screen9.tv/media/gy35PKLHe-5K29RYHga2bw/ett-starkare-samhalle-en-snabbguide-om-sveriges-folkhogskolor', + 'md5': 'c9389806e78573ea34fc48b6f94465dc', + 'info_dict': { + 'id': 'gy35PKLHe-5K29RYHga2bw', + 'ext': 'mp4', + 'title': 'Ett starkare samhälle - en snabbguide om Sveriges folkhögskolor', + 'thumbnail': r're:^https?://.+\.jpg', + }, + }, + { + 'url': 'https://play.su.se/media/H1YA0EYNCxiesrSU1kaRBQ/baltic-breakfast', + 'md5': '2b817647c3058002526269deff4c0683', + 'info_dict': { + 'id': 'H1YA0EYNCxiesrSU1kaRBQ', + 'ext': 'mp4', + 'title': 'Baltic Breakfast', + 'thumbnail': r're:^https?://.+\.jpg', + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(f'https://api.screen9.com/embed/{video_id}', video_id) + config = self._search_json(r'var\s+config\s*=', webpage, 'config', video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + traverse_obj(config, ('src', lambda _, v: v['type'] == 'application/x-mpegURL', 'src'), get_all=False), + video_id, ext='mp4') + formats.append({ + 'url': traverse_obj(config, ('src', lambda _, v: v['type'] == 'video/mp4', 'src'), get_all=False), + 'format': 'mp4', + }) + + return { + 'id': video_id, + 'title': traverse_obj( + config, + ('plugins', (('title', 'title'), ('googleAnalytics', 'title'), ('share', 'mediaTitle'))), + get_all=False), + 'description': traverse_obj(config, ('plugins', 'title', 'description')), + 'thumbnail': traverse_obj(config, ('poster')), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/screencast.py b/yt_dlp/extractor/screencast.py new file mode 100644 index 0000000..df5e79b --- /dev/null +++ b/yt_dlp/extractor/screencast.py @@ -0,0 +1,117 @@ +import urllib.request + +from .common import InfoExtractor +from ..compat import compat_parse_qs +from ..utils import ExtractorError + + +class ScreencastIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'http://www.screencast.com/t/3ZEjQXlT', + 'md5': '917df1c13798a3e96211dd1561fded83', + 'info_dict': { + 'id': '3ZEjQXlT', + 'ext': 'm4v', + 'title': 'Color Measurement with Ocean Optics Spectrometers', + 'description': 'md5:240369cde69d8bed61349a199c5fb153', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', + 'md5': 'e8e4b375a7660a9e7e35c33973410d34', + 'info_dict': { + 'id': 'V2uXehPJa1ZI', + 'ext': 'mov', + 'title': 'The Amadeus Spectrometer', + 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/aAB3iowa', + 'md5': 'dedb2734ed00c9755761ccaee88527cd', + 'info_dict': { + 'id': 'aAB3iowa', + 'ext': 'mp4', + 'title': 'Google Earth Export', + 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/X3ddTrYh', + 'md5': '669ee55ff9c51988b4ebc0877cc8b159', + 'info_dict': { + 'id': 'X3ddTrYh', + 'ext': 'wmv', + 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', + 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://screencast.com/t/aAB3iowa', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'<embed name="Video".*?src="([^"]+)"', webpage, + 'QuickTime embed', default=None) + + if video_url is None: + flash_vars_s = self._html_search_regex( + r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars', + default=None) + if not flash_vars_s: + flash_vars_s = self._html_search_regex( + r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars', + default=None) + if flash_vars_s: + flash_vars_s = flash_vars_s.replace(',', '&') + if flash_vars_s: + flash_vars = compat_parse_qs(flash_vars_s) + video_url_raw = urllib.request.quote( + flash_vars['content'][0]) + video_url = video_url_raw.replace('http%3A', 'http:') + + if video_url is None: + video_meta = self._html_search_meta( + 'og:video', webpage, default=None) + if video_meta: + video_url = self._search_regex( + r'src=(.*?)(?:$|&)', video_meta, + 'meta tag video URL', default=None) + + if video_url is None: + video_url = self._html_search_regex( + r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video url', default=None, group='url') + + if video_url is None: + video_url = self._html_search_meta( + 'og:video', webpage, default=None) + + if video_url is None: + raise ExtractorError('Cannot find video') + + title = self._og_search_title(webpage, default=None) + if title is None: + title = self._html_search_regex( + [r'<b>Title:</b> ([^<]+)</div>', + r'class="tabSeperator">></span><span class="tabText">(.+?)<', + r'<title>([^<]+)'], + webpage, 'title') + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage, default=None) + if description is None: + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/yt_dlp/extractor/screencastify.py b/yt_dlp/extractor/screencastify.py new file mode 100644 index 0000000..3c43043 --- /dev/null +++ b/yt_dlp/extractor/screencastify.py @@ -0,0 +1,70 @@ +import urllib.parse + +from .common import InfoExtractor +from ..utils import traverse_obj, update_url_query + + +class ScreencastifyIE(InfoExtractor): + _VALID_URL = [ + r'https?://watch\.screencastify\.com/v/(?P[^/?#]+)', + r'https?://app\.screencastify\.com/v[23]/watch/(?P[^/?#]+)', + ] + _TESTS = [{ + 'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8', + 'info_dict': { + 'id': 'sYVkZip3quLKhHw4Ybk8', + 'ext': 'mp4', + 'title': 'Inserting and Aligning the Case Top and Bottom', + 'description': '', + 'uploader': 'Paul Gunn', + 'extra_param_to_segment_url': str, + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + 'url': 'https://app.screencastify.com/v3/watch/J5N7H11wofDN1jZUCr3t', + 'info_dict': { + 'id': 'J5N7H11wofDN1jZUCr3t', + 'ext': 'mp4', + 'uploader': 'Scott Piesen', + 'description': '', + 'title': 'Lesson Recording 1-17 Burrr...', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + 'url': 'https://app.screencastify.com/v2/watch/BQ26VbUdfbQLhKzkktOk', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_json( + f'https://umbrella.svc.screencastify.com/api/umbrellaService/watch/{video_id}', video_id) + + query_string = traverse_obj(info, ('manifest', 'auth', 'query')) + query = urllib.parse.parse_qs(query_string) + formats = [] + dash_manifest_url = traverse_obj(info, ('manifest', 'url')) + if dash_manifest_url: + formats.extend( + self._extract_mpd_formats( + dash_manifest_url, video_id, mpd_id='dash', query=query, fatal=False)) + hls_manifest_url = traverse_obj(info, ('manifest', 'hlsUrl')) + if hls_manifest_url: + formats.extend( + self._extract_m3u8_formats( + hls_manifest_url, video_id, ext='mp4', m3u8_id='hls', query=query, fatal=False)) + for f in formats: + f['url'] = update_url_query(f['url'], query) + + return { + 'id': video_id, + 'title': info.get('title'), + 'description': info.get('description'), + 'uploader': info.get('userName'), + 'formats': formats, + 'extra_param_to_segment_url': query_string, + } diff --git a/yt_dlp/extractor/screencastomatic.py b/yt_dlp/extractor/screencastomatic.py new file mode 100644 index 0000000..28e25e9 --- /dev/null +++ b/yt_dlp/extractor/screencastomatic.py @@ -0,0 +1,72 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + get_element_by_class, + int_or_none, + remove_start, + strip_or_none, + unified_strdate, + urlencode_postdata, +) + + +class ScreencastOMaticIE(InfoExtractor): + _VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P[0-9a-zA-Z]+)' + _TESTS = [{ + 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', + 'md5': '483583cb80d92588f15ccbedd90f0c18', + 'info_dict': { + 'id': 'c2lD3BeOPl', + 'ext': 'mp4', + 'title': 'Welcome to 3-4 Philosophy @ DECV!', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', + 'duration': 369, + 'upload_date': '20141216', + } + }, { + 'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl', + 'only_matching': True, + }, { + 'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'https://screencast-o-matic.com/player/' + video_id, video_id) + + if (self._html_extract_title(webpage) == 'Protected Content' + or 'This video is private and requires a password' in webpage): + password = self.get_param('videopassword') + + if not password: + raise ExtractorError('Password protected video, use --video-password ', expected=True) + + form = self._search_regex( + r'(?is)]*>(?P
.+?)
', webpage, 'login form', group='form') + form_data = self._hidden_inputs(form) + form_data.update({ + 'scPassword': password, + }) + + webpage = self._download_webpage( + 'https://screencast-o-matic.com/player/password', video_id, 'Logging in', + data=urlencode_postdata(form_data)) + + if 'Invalid password' in webpage: + raise ExtractorError('Unable to login: Invalid password', expected=True) + + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + info.update({ + 'id': video_id, + 'title': get_element_by_class('overlayTitle', webpage), + 'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None, + 'duration': int_or_none(self._search_regex( + r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};', + webpage, 'duration', default=None)), + 'upload_date': unified_strdate(remove_start( + get_element_by_class('overlayPublished', webpage), 'Published: ')), + }) + return info diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py new file mode 100644 index 0000000..3912f77 --- /dev/null +++ b/yt_dlp/extractor/scrippsnetworks.py @@ -0,0 +1,155 @@ +import json +import hashlib + +from .aws import AWSIE +from .anvato import AnvatoIE +from .common import InfoExtractor +from ..utils import ( + smuggle_url, + urlencode_postdata, + xpath_text, +) + + +class ScrippsNetworksWatchIE(AWSIE): + IE_NAME = 'scrippsnetworks:watch' + _VALID_URL = r'''(?x) + https?:// + watch\. + (?Pgeniuskitchen)\.com/ + (?: + player\.[A-Z0-9]+\.html\#| + show/(?:[^/]+/){2}| + player/ + ) + (?P\d+) + ''' + _TESTS = [{ + 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', + 'info_dict': { + 'id': '4194875', + 'ext': 'mp4', + 'title': 'Ample Hills Ice Cream Bike', + 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.', + 'uploader': 'ANV', + 'upload_date': '20171011', + 'timestamp': 1507698000, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [AnvatoIE.ie_key()], + 'skip': '404 Not Found', + }] + + _SNI_TABLE = { + 'geniuskitchen': 'genius', + } + + _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' + _AWS_PROXY_HOST = 'web.api.video.snidigital.com' + + _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + site_id, video_id = mobj.group('site', 'id') + + aws_identity_id_json = json.dumps({ + 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION + }).encode('utf-8') + token = self._download_json( + 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, + data=aws_identity_id_json, + headers={ + 'Accept': '*/*', + 'Content-Type': 'application/x-amz-json-1.1', + 'Referer': url, + 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), + 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', + 'X-Amz-User-Agent': self._AWS_USER_AGENT, + })['Token'] + + sts = self._download_xml( + 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({ + 'Action': 'AssumeRoleWithWebIdentity', + 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role', + 'RoleSessionName': 'web-identity', + 'Version': '2011-06-15', + 'WebIdentityToken': token, + }), headers={ + 'Referer': url, + 'X-Amz-User-Agent': self._AWS_USER_AGENT, + 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', + }) + + def get(key): + return xpath_text( + sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, + fatal=True) + + mcp_id = self._aws_execute_api({ + 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), + 'access_key': get('AccessKeyId'), + 'secret_key': get('SecretAccessKey'), + 'session_token': get('SessionToken'), + }, video_id)['results'][0]['mcpId'] + + return self.url_result( + smuggle_url( + 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + {'geo_countries': ['US']}), + AnvatoIE.ie_key(), video_id=mcp_id) + + +class ScrippsNetworksIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?Pcookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P\d+)' + _TESTS = [{ + 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338', + 'info_dict': { + 'id': '0260338', + 'ext': 'mp4', + 'title': 'The Best of the Best', + 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.', + 'timestamp': 1475678834, + 'upload_date': '20161005', + 'uploader': 'SCNI-SCND', + 'tags': 'count:10', + 'creator': 'Cooking Channel', + 'duration': 29.995, + 'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': ''}], + 'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg', + }, + 'add_ie': ['ThePlatform'], + 'expected_warnings': ['No HLS formats found'], + }, { + 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790', + 'only_matching': True, + }, { + 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591', + 'only_matching': True, + }, { + 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929', + 'only_matching': True, + }, { + 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', + 'only_matching': True, + }, { + 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368', + 'only_matching': True, + }] + _ACCOUNT_MAP = { + 'cookingchanneltv': 2433005105, + 'discovery': 2706091867, + 'diynetwork': 2433004575, + 'foodnetwork': 2433005105, + 'hgtv': 2433004575, + 'travelchannel': 2433005739, + } + _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true' + + def _real_extract(self, url): + site, guid = self._match_valid_url(url).groups() + return self.url_result(smuggle_url( + self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid), + {'force_smil_url': True}), 'ThePlatform', guid) diff --git a/yt_dlp/extractor/scrolller.py b/yt_dlp/extractor/scrolller.py new file mode 100644 index 0000000..4f9fa14 --- /dev/null +++ b/yt_dlp/extractor/scrolller.py @@ -0,0 +1,102 @@ +import json + +from .common import InfoExtractor +from ..utils import determine_ext, int_or_none + + +class ScrolllerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?scrolller\.com/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://scrolller.com/a-helping-hand-1k9pxikxkw', + 'info_dict': { + 'id': 'a-helping-hand-1k9pxikxkw', + 'ext': 'mp4', + 'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg', + 'title': 'A helping hand', + 'age_limit': 0, + } + }, { + 'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j', + 'info_dict': { + 'id': 'tigers-chasing-a-drone-c5d1f2so6j', + 'ext': 'mp4', + 'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg', + 'title': 'Tigers chasing a drone', + 'age_limit': 0, + } + }, { + 'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p', + 'info_dict': { + 'id': 'baby-rhino-smells-something-9chhugsv9p', + 'ext': 'mp4', + 'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg', + 'title': 'Baby rhino smells something', + 'age_limit': 0, + } + }, { + 'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7', + 'info_dict': { + 'id': 'its-all-fun-and-games-cco8jjmoh7', + 'ext': 'mp4', + 'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg', + 'title': 'It\'s all fun and games...', + 'age_limit': 0, + } + }, { + 'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a', + 'info_dict': { + 'id': 'may-the-force-be-with-you-octokuro-yeytg1fs7a', + 'ext': 'mp4', + 'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg', + 'title': 'May the force be with you (Octokuro)', + 'age_limit': 18, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + query = { + 'query': '''{ + getSubredditPost(url:"/%s"){ + id + title + isNsfw + mediaSources{ + url + width + height + } + } + }''' % video_id + } + + video_data = self._download_json( + 'https://api.scrolller.com/api/v2/graphql', video_id, data=json.dumps(query).encode(), + headers={'Content-Type': 'application/json'})['data']['getSubredditPost'] + + formats, thumbnails = [], [] + for source in video_data['mediaSources']: + if determine_ext(source.get('url')) in ('jpg', 'png'): + thumbnails.append({ + 'url': source['url'], + 'width': int_or_none(source.get('width')), + 'height': int_or_none(source.get('height')), + }) + elif source.get('url'): + formats.append({ + 'url': source['url'], + 'width': int_or_none(source.get('width')), + 'height': int_or_none(source.get('height')), + }) + + if not formats: + self.raise_no_formats('There is no video.', expected=True, video_id=video_id) + + return { + 'id': video_id, + 'title': video_data.get('title'), + 'thumbnails': thumbnails, + 'formats': formats, + 'age_limit': 18 if video_data.get('isNsfw') else 0 + } diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py new file mode 100644 index 0000000..9c2ca8c --- /dev/null +++ b/yt_dlp/extractor/scte.py @@ -0,0 +1,137 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + decode_packed_codes, + ExtractorError, + urlencode_postdata, +) + + +class SCTEBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' + _NETRC_MACHINE = 'scte' + + def _perform_login(self, username, password): + login_popup = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r'class=["\']welcome\b', r'>Sign Out<')) + + # already logged in + if is_logged(login_popup): + return + + login_form = self._hidden_inputs(login_popup) + + login_form.update({ + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, + 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', + }) + + response = self._download_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form)) + + if '|pageRedirect|' not in response and not is_logged(response): + error = self._html_search_regex( + r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)\d+)' + _TESTS = [{ + 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', + 'info_dict': { + 'title': 'Introduction to DOCSIS Engineering Professional', + 'id': '31484', + }, + 'playlist_count': 5, + 'skip': 'Requires account credentials', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._search_regex(r'

(.+?)

', webpage, 'title') + + context_id = self._search_regex(r'context-(\d+)', webpage, video_id) + content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id + context = decode_packed_codes(self._download_webpage( + '%smobile/data.js' % content_base, video_id)) + + data = self._parse_xml( + self._search_regex( + r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), + video_id) + + entries = [] + for asset in data.findall('.//asset'): + asset_url = asset.get('url') + if not asset_url or not asset_url.endswith('.mp4'): + continue + asset_id = self._search_regex( + r'video_([^_]+)_', asset_url, 'asset id', default=None) + if not asset_id: + continue + entries.append({ + 'id': asset_id, + 'title': title, + 'url': content_base + asset_url, + }) + + return self.playlist_result(entries, video_id, title) + + +class SCTECourseIE(SCTEBaseIE): + _WORKING = False + _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P\d+)' + _TESTS = [{ + 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', + 'only_matching': True, + }, { + 'url': 'https://learning.scte.org/course/view.php?id=3639', + 'only_matching': True, + }, { + 'url': 'https://learning.scte.org/course/view.php?id=3073', + 'only_matching': True, + }] + + def _real_extract(self, url): + course_id = self._match_id(url) + + webpage = self._download_webpage(url, course_id) + + title = self._search_regex( + r'

(.+?)

', webpage, 'title', default=None) + + entries = [] + for mobj in re.finditer( + r'''(?x) + ]+ + href=(["\']) + (?P + https?://learning\.scte\.org/mod/ + (?Pscorm|subcourse)/view\.php?(?:(?!\1).)*? + \bid=\d+ + ) + ''', + webpage): + item_url = mobj.group('url') + if item_url == url: + continue + ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' + else SCTECourseIE.ie_key()) + entries.append(self.url_result(item_url, ie=ie)) + + return self.playlist_result(entries, course_id, title) diff --git a/yt_dlp/extractor/sejmpl.py b/yt_dlp/extractor/sejmpl.py new file mode 100644 index 0000000..29cb015 --- /dev/null +++ b/yt_dlp/extractor/sejmpl.py @@ -0,0 +1,218 @@ +import datetime + +from .common import InfoExtractor +from .redge import RedCDNLivxIE +from ..utils import ( + clean_html, + join_nonempty, + js_to_json, + strip_or_none, + update_url_query, +) +from ..utils.traversal import traverse_obj + + +def is_dst(date): + last_march = datetime.datetime(date.year, 3, 31) + last_october = datetime.datetime(date.year, 10, 31) + last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7) + last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7) + return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3) + + +def rfc3339_to_atende(date): + date = datetime.datetime.fromisoformat(date) + date = date + datetime.timedelta(hours=1 if is_dst(date) else 0) + return int((date.timestamp() - 978307200) * 1000) + + +class SejmIE(InfoExtractor): + _VALID_URL = ( + r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P[\dA-F]+)', + r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P[\dA-F]+)', + r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P\d+)\.nsf/VideoFrame\.xsp/(?P[\dA-F]+)', + ) + IE_NAME = 'sejm' + + _TESTS = [{ + # multiple cameras, polish SL iterpreter + 'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5', + 'info_dict': { + 'id': '6181EF1AD9CEEBB5C1258A6D006452B5', + 'title': '1. posiedzenie Sejmu X kadencji', + 'duration': 20145, + 'live_status': 'was_live', + 'location': 'Sala Posiedzeń', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ENC01-722340000000-722360145000', + 'ext': 'mp4', + 'duration': 20145, + 'title': '1. posiedzenie Sejmu X kadencji - ENC01', + 'live_status': 'was_live', + }, + }, { + 'info_dict': { + 'id': 'ENC30-722340000000-722360145000', + 'ext': 'mp4', + 'duration': 20145, + 'title': '1. posiedzenie Sejmu X kadencji - ENC30', + 'live_status': 'was_live', + }, + }, { + 'info_dict': { + 'id': 'ENC31-722340000000-722360145000', + 'ext': 'mp4', + 'duration': 20145, + 'title': '1. posiedzenie Sejmu X kadencji - ENC31', + 'live_status': 'was_live', + }, + }, { + 'info_dict': { + 'id': 'ENC32-722340000000-722360145000', + 'ext': 'mp4', + 'duration': 20145, + 'title': '1. posiedzenie Sejmu X kadencji - ENC32', + 'live_status': 'was_live', + }, + }, { + # sign lang interpreter + 'info_dict': { + 'id': 'Migacz-ENC01-1-722340000000-722360145000', + 'ext': 'mp4', + 'duration': 20145, + 'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01', + 'live_status': 'was_live', + }, + }], + }, { + 'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2', + 'info_dict': { + 'id': '9377A9D65518E9A5C125808E002E9FF2', + 'title': 'Debata "Lepsza Polska: obywatelska"', + 'description': 'KP .Nowoczesna', + 'duration': 8770, + 'live_status': 'was_live', + 'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ENC08-1-503831270000-503840040000', + 'ext': 'mp4', + 'duration': 8770, + 'title': 'Debata "Lepsza Polska: obywatelska" - ENC08', + 'live_status': 'was_live', + }, + }], + }, { + # 7th term is very special, since it does not use redcdn livx + 'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F', + 'info_dict': { + 'id': 'A6E6D475ECCC6FE5C1257EF90034817F', + 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu', + 'description': 'SLD - Biuro Prasowe Klubu', + 'duration': 514, + 'location': 'sala 101/bud. C', + 'live_status': 'was_live', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'A6E6D475ECCC6FE5C1257EF90034817F', + 'ext': 'mp4', + 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu', + 'duration': 514, + }, + }], + }, { + 'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492', + 'only_matching': True, + }] + + def _real_extract(self, url): + term, video_id = self._match_valid_url(url).group('term', 'id') + frame = self._download_webpage( + f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}', + video_id) + # despite it says "transmisje_arch", it works for live streams too! + data = self._download_json( + f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}', + video_id) + params = data['params'] + + title = strip_or_none(data.get('title')) + + if data.get('status') == 'VIDEO_ENDED': + live_status = 'was_live' + elif data.get('status') == 'VIDEO_PLAYING': + live_status = 'is_live' + else: + live_status = None + self.report_warning(f'unknown status: {data.get("status")}') + + start_time = rfc3339_to_atende(params['start']) + # current streams have a stop time of *expected* end of session, but actual times + # can change during the transmission. setting a stop_time would artificially + # end the stream at that time, while the session actually keeps going. + if live_status == 'was_live': + stop_time = rfc3339_to_atende(params['stop']) + duration = (stop_time - start_time) // 1000 + else: + stop_time, duration = None, None + + entries = [] + + def add_entry(file, legacy_file=False): + if not file: + return + file = self._proto_relative_url(file) + if not legacy_file: + file = update_url_query(file, {'startTime': start_time}) + if stop_time is not None: + file = update_url_query(file, {'stopTime': stop_time}) + stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id') + common_info = { + 'url': file, + 'duration': duration, + } + if legacy_file: + entries.append({ + **common_info, + 'id': video_id, + 'title': title, + }) + else: + entries.append({ + **common_info, + '_type': 'url_transparent', + 'ie_key': RedCDNLivxIE.ie_key(), + 'id': stream_id, + 'title': join_nonempty(title, stream_id, delim=' - '), + }) + + cameras = self._search_json( + r'var\s+cameras\s*=', frame, 'camera list', video_id, + contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json, + fatal=False) or [] + for camera_file in traverse_obj(cameras, (..., 'file', {dict})): + if camera_file.get('flv'): + add_entry(camera_file['flv']) + elif camera_file.get('mp4'): + # this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx + add_entry(camera_file['mp4'], legacy_file=True) + else: + self.report_warning('Unknown camera stream type found') + + if params.get('mig'): + add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False)) + + return { + '_type': 'playlist', + 'entries': entries, + 'id': video_id, + 'title': title, + 'description': clean_html(data.get('desc')) or None, + 'duration': duration, + 'live_status': live_status, + 'location': strip_or_none(data.get('location')), + } diff --git a/yt_dlp/extractor/senalcolombia.py b/yt_dlp/extractor/senalcolombia.py new file mode 100644 index 0000000..b2f354f --- /dev/null +++ b/yt_dlp/extractor/senalcolombia.py @@ -0,0 +1,32 @@ +from .common import InfoExtractor +from .rtvcplay import RTVCKalturaIE + + +class SenalColombiaLiveIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://(?:www\.)?senalcolombia\.tv/(?Psenal-en-vivo)' + + _TESTS = [{ + 'url': 'https://www.senalcolombia.tv/senal-en-vivo', + 'info_dict': { + 'id': 'indexSC', + 'title': 're:^Señal Colombia', + 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'live_status': 'is_live', + 'ext': 'mp4', + }, + 'params': { + 'skip_download': 'Livestream', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + hydration = self._search_json( + r']*data-drupal-selector\s*=\s*"[^"]*drupal-settings-json[^"]*"[^>]*>', + webpage, 'hydration', display_id) + + return self.url_result(hydration['envivosrc'], RTVCKalturaIE, display_id) diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py new file mode 100644 index 0000000..7ff0cf5 --- /dev/null +++ b/yt_dlp/extractor/senategov.py @@ -0,0 +1,200 @@ +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + parse_qs, + unsmuggle_url, +) + +_COMMITTEES = { + 'ag': ('76440', 'http://ag-f.akamaihd.net'), + 'aging': ('76442', 'http://aging-f.akamaihd.net'), + 'approps': ('76441', 'http://approps-f.akamaihd.net'), + 'arch': ('', 'http://ussenate-f.akamaihd.net'), + 'armed': ('76445', 'http://armed-f.akamaihd.net'), + 'banking': ('76446', 'http://banking-f.akamaihd.net'), + 'budget': ('76447', 'http://budget-f.akamaihd.net'), + 'cecc': ('76486', 'http://srs-f.akamaihd.net'), + 'commerce': ('80177', 'http://commerce1-f.akamaihd.net'), + 'csce': ('75229', 'http://srs-f.akamaihd.net'), + 'dpc': ('76590', 'http://dpc-f.akamaihd.net'), + 'energy': ('76448', 'http://energy-f.akamaihd.net'), + 'epw': ('76478', 'http://epw-f.akamaihd.net'), + 'ethics': ('76449', 'http://ethics-f.akamaihd.net'), + 'finance': ('76450', 'http://finance-f.akamaihd.net'), + 'foreign': ('76451', 'http://foreign-f.akamaihd.net'), + 'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'), + 'help': ('76452', 'http://help-f.akamaihd.net'), + 'indian': ('76455', 'http://indian-f.akamaihd.net'), + 'intel': ('76456', 'http://intel-f.akamaihd.net'), + 'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'), + 'jccic': ('85180', 'http://jccic-f.akamaihd.net'), + 'jec': ('76458', 'http://jec-f.akamaihd.net'), + 'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'), + 'rpc': ('76591', 'http://rpc-f.akamaihd.net'), + 'rules': ('76460', 'http://rules-f.akamaihd.net'), + 'saa': ('76489', 'http://srs-f.akamaihd.net'), + 'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'), + 'srs': ('75229', 'http://srs-f.akamaihd.net'), + 'uscc': ('76487', 'http://srs-f.akamaihd.net'), + 'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'), +} + + +class SenateISVPIE(InfoExtractor): + _IE_NAME = 'senate.gov:isvp' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P.+)' + _EMBED_REGEX = [r"]+src=['\"](?Phttps?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]"] + + _TESTS = [{ + 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', + 'info_dict': { + 'id': 'judiciary031715', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', + 'info_dict': { + 'id': 'commerce011514', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', + # checksum differs each time + 'info_dict': { + 'id': 'intel090613', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + } + }, { + # From http://www.c-span.org/video/?96791-1 + 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', + 'only_matching': True, + }] + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + qs = compat_parse_qs(self._match_valid_url(url).group('qs')) + if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): + raise ExtractorError('Invalid URL', expected=True) + + video_id = re.sub(r'.mp4$', '', qs['filename'][0]) + + webpage = self._download_webpage(url, video_id) + + if smuggled_data.get('force_title'): + title = smuggled_data['force_title'] + else: + title = self._html_extract_title(webpage) + poster = qs.get('poster') + thumbnail = poster[0] if poster else None + + video_type = qs['type'][0] + committee = video_type if video_type == 'arch' else qs['comm'][0] + + stream_num, domain = _COMMITTEES[committee] + + formats = [] + if video_type == 'arch': + filename = video_id if '.' in video_id else video_id + '.mp4' + m3u8_url = compat_urlparse.urljoin(domain, 'i/' + filename + '/master.m3u8') + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8') + else: + hdcore_sign = 'hdcore=3.1.0' + url_params = (domain, video_id, stream_num) + f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params + m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params + for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): + # URLs without the extra param induce an 404 error + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.append(entry) + for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): + mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) + if mobj: + entry['format_id'] += mobj.group('tag') + formats.append(entry) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } + + +class SenateGovIE(InfoExtractor): + _IE_NAME = 'senate.gov' + _VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov' + _TESTS = [{ + 'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'info_dict': { + 'id': 'help090920', + 'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health', + 'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.appropriations.senate.gov/hearings/watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'info_dict': { + 'id': 'appropsA051518', + 'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'title': 'Review of the FY2019 Budget Request for the U.S. Army', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'info_dict': { + 'id': 'banking041521', + 'display_id': '21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization', + 'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + display_id = self._generic_id(url) + webpage = self._download_webpage(url, display_id) + parse_info = parse_qs(self._search_regex( + r'