summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.github/PULL_REQUEST_TEMPLATE.md1
-rw-r--r--.github/banner.svg10
-rw-r--r--.github/workflows/build.yml138
-rw-r--r--.github/workflows/core.yml2
-rw-r--r--.github/workflows/quick-test.yml18
-rw-r--r--.github/workflows/release.yml8
-rw-r--r--.gitignore2
-rw-r--r--.pre-commit-config.yaml14
-rw-r--r--.pre-commit-hatch.yaml9
-rw-r--r--CONTRIBUTING.md77
-rw-r--r--CONTRIBUTORS31
-rw-r--r--Changelog.md216
-rw-r--r--Makefile33
-rw-r--r--README.md95
-rw-r--r--bundle/docker/compose.yml10
-rw-r--r--bundle/docker/static/Dockerfile21
-rwxr-xr-xbundle/docker/static/entrypoint.sh13
-rwxr-xr-xbundle/py2exe.py2
-rw-r--r--devscripts/changelog_override.json43
-rwxr-xr-xdevscripts/install_deps.py46
-rw-r--r--devscripts/make_changelog.py51
-rw-r--r--devscripts/prepare_manpage.py29
-rw-r--r--devscripts/run_tests.bat4
-rwxr-xr-xdevscripts/run_tests.py14
-rwxr-xr-xdevscripts/run_tests.sh4
-rwxr-xr-xdevscripts/tomlparse.py10
-rw-r--r--devscripts/update-version.py4
-rwxr-xr-xdevscripts/update_changelog.py26
-rwxr-xr-xpyinst.py17
-rw-r--r--pyproject.toml170
-rw-r--r--setup.cfg6
-rwxr-xr-xsetup.py36
-rw-r--r--supportedsites.md50
-rw-r--r--test/conftest.py50
-rw-r--r--test/helper.py5
-rw-r--r--test/test_InfoExtractor.py9
-rw-r--r--test/test_YoutubeDL.py4
-rw-r--r--test/test_cookies.py4
-rw-r--r--test/test_http_proxy.py380
-rw-r--r--test/test_networking.py849
-rw-r--r--test/test_socks.py33
-rw-r--r--test/test_traversal.py444
-rw-r--r--test/test_utils.py417
-rw-r--r--test/test_websockets.py126
-rw-r--r--yt_dlp/YoutubeDL.py127
-rw-r--r--yt_dlp/__init__.py42
-rw-r--r--yt_dlp/__pyinstaller/hook-yt_dlp.py6
-rw-r--r--yt_dlp/compat/__init__.py9
-rw-r--r--yt_dlp/cookies.py21
-rw-r--r--yt_dlp/dependencies/__init__.py4
-rw-r--r--yt_dlp/downloader/common.py11
-rw-r--r--yt_dlp/downloader/external.py4
-rw-r--r--yt_dlp/extractor/_extractors.py1073
-rw-r--r--yt_dlp/extractor/abc.py4
-rw-r--r--yt_dlp/extractor/abematv.py5
-rw-r--r--yt_dlp/extractor/acfun.py4
-rw-r--r--yt_dlp/extractor/adn.py4
-rw-r--r--yt_dlp/extractor/adobetv.py4
-rw-r--r--yt_dlp/extractor/afreecatv.py446
-rw-r--r--yt_dlp/extractor/airtv.py2
-rw-r--r--yt_dlp/extractor/allstar.py1
-rw-r--r--yt_dlp/extractor/alphaporno.py4
-rw-r--r--yt_dlp/extractor/alura.py16
-rw-r--r--yt_dlp/extractor/amara.py2
-rw-r--r--yt_dlp/extractor/amp.py2
-rw-r--r--yt_dlp/extractor/anchorfm.py2
-rw-r--r--yt_dlp/extractor/angel.py2
-rw-r--r--yt_dlp/extractor/appleconnect.py5
-rw-r--r--yt_dlp/extractor/appletrailers.py2
-rw-r--r--yt_dlp/extractor/ard.py4
-rw-r--r--yt_dlp/extractor/arnes.py2
-rw-r--r--yt_dlp/extractor/arte.py64
-rw-r--r--yt_dlp/extractor/asobistage.py154
-rw-r--r--yt_dlp/extractor/atvat.py10
-rw-r--r--yt_dlp/extractor/awaan.py2
-rw-r--r--yt_dlp/extractor/aws.py4
-rw-r--r--yt_dlp/extractor/banbye.py4
-rw-r--r--yt_dlp/extractor/bannedvideo.py6
-rw-r--r--yt_dlp/extractor/bbc.py438
-rw-r--r--yt_dlp/extractor/beeg.py1
-rw-r--r--yt_dlp/extractor/bibeltv.py4
-rw-r--r--yt_dlp/extractor/bilibili.py26
-rw-r--r--yt_dlp/extractor/bleacherreport.py2
-rw-r--r--yt_dlp/extractor/blogger.py2
-rw-r--r--yt_dlp/extractor/boosty.py20
-rw-r--r--yt_dlp/extractor/bostonglobe.py1
-rw-r--r--yt_dlp/extractor/box.py37
-rw-r--r--yt_dlp/extractor/boxcast.py6
-rw-r--r--yt_dlp/extractor/brainpop.py2
-rw-r--r--yt_dlp/extractor/brightcove.py4
-rw-r--r--yt_dlp/extractor/brilliantpala.py13
-rw-r--r--yt_dlp/extractor/bundestag.py8
-rw-r--r--yt_dlp/extractor/cableav.py32
-rw-r--r--yt_dlp/extractor/caffeinetv.py74
-rw-r--r--yt_dlp/extractor/canalalpha.py35
-rw-r--r--yt_dlp/extractor/cbc.py108
-rw-r--r--yt_dlp/extractor/cbs.py6
-rw-r--r--yt_dlp/extractor/cda.py68
-rw-r--r--yt_dlp/extractor/ceskatelevize.py2
-rw-r--r--yt_dlp/extractor/cinetecamilano.py1
-rw-r--r--yt_dlp/extractor/clippit.py4
-rw-r--r--yt_dlp/extractor/common.py158
-rw-r--r--yt_dlp/extractor/commonmistakes.py16
-rw-r--r--yt_dlp/extractor/corus.py2
-rw-r--r--yt_dlp/extractor/crackle.py2
-rw-r--r--yt_dlp/extractor/crunchyroll.py290
-rw-r--r--yt_dlp/extractor/cspan.py6
-rw-r--r--yt_dlp/extractor/ctsnews.py2
-rw-r--r--yt_dlp/extractor/dailymail.py2
-rw-r--r--yt_dlp/extractor/damtomo.py2
-rw-r--r--yt_dlp/extractor/dangalplay.py197
-rw-r--r--yt_dlp/extractor/democracynow.py4
-rw-r--r--yt_dlp/extractor/digitalconcerthall.py1
-rw-r--r--yt_dlp/extractor/discoverygo.py2
-rw-r--r--yt_dlp/extractor/disney.py4
-rw-r--r--yt_dlp/extractor/douyutv.py2
-rw-r--r--yt_dlp/extractor/dplay.py45
-rw-r--r--yt_dlp/extractor/dropbox.py4
-rw-r--r--yt_dlp/extractor/drtuber.py2
-rw-r--r--yt_dlp/extractor/dtube.py4
-rw-r--r--yt_dlp/extractor/duboku.py2
-rw-r--r--yt_dlp/extractor/dvtv.py4
-rw-r--r--yt_dlp/extractor/dw.py2
-rw-r--r--yt_dlp/extractor/einthusan.py105
-rw-r--r--yt_dlp/extractor/eplus.py32
-rw-r--r--yt_dlp/extractor/ertgr.py4
-rw-r--r--yt_dlp/extractor/europa.py22
-rw-r--r--yt_dlp/extractor/euscreen.py3
-rw-r--r--yt_dlp/extractor/eyedotv.py4
-rw-r--r--yt_dlp/extractor/facebook.py2
-rw-r--r--yt_dlp/extractor/fancode.py8
-rw-r--r--yt_dlp/extractor/fathom.py54
-rw-r--r--yt_dlp/extractor/faz.py2
-rw-r--r--yt_dlp/extractor/fczenit.py2
-rw-r--r--yt_dlp/extractor/fifa.py1
-rw-r--r--yt_dlp/extractor/filmon.py4
-rw-r--r--yt_dlp/extractor/gab.py2
-rw-r--r--yt_dlp/extractor/gamejolt.py2
-rw-r--r--yt_dlp/extractor/gaskrank.py1
-rw-r--r--yt_dlp/extractor/gbnews.py107
-rw-r--r--yt_dlp/extractor/generic.py18
-rw-r--r--yt_dlp/extractor/gettr.py2
-rw-r--r--yt_dlp/extractor/gigya.py1
-rw-r--r--yt_dlp/extractor/glomex.py2
-rw-r--r--yt_dlp/extractor/go.py10
-rw-r--r--yt_dlp/extractor/godresource.py79
-rw-r--r--yt_dlp/extractor/gofile.py18
-rw-r--r--yt_dlp/extractor/googledrive.py36
-rw-r--r--yt_dlp/extractor/goplay.py4
-rw-r--r--yt_dlp/extractor/gotostage.py9
-rw-r--r--yt_dlp/extractor/hbo.py4
-rw-r--r--yt_dlp/extractor/hearthisat.py46
-rw-r--r--yt_dlp/extractor/hketv.py2
-rw-r--r--yt_dlp/extractor/hrti.py2
-rw-r--r--yt_dlp/extractor/huya.py6
-rw-r--r--yt_dlp/extractor/hytale.py5
-rw-r--r--yt_dlp/extractor/ichinanalive.py2
-rw-r--r--yt_dlp/extractor/imgur.py18
-rw-r--r--yt_dlp/extractor/infoq.py4
-rw-r--r--yt_dlp/extractor/instagram.py5
-rw-r--r--yt_dlp/extractor/iprima.py6
-rw-r--r--yt_dlp/extractor/iqiyi.py10
-rw-r--r--yt_dlp/extractor/itprotv.py3
-rw-r--r--yt_dlp/extractor/itv.py9
-rw-r--r--yt_dlp/extractor/iwara.py4
-rw-r--r--yt_dlp/extractor/jable.py103
-rw-r--r--yt_dlp/extractor/jamendo.py2
-rw-r--r--yt_dlp/extractor/japandiet.py4
-rw-r--r--yt_dlp/extractor/jiocinema.py403
-rw-r--r--yt_dlp/extractor/jiosaavn.py207
-rw-r--r--yt_dlp/extractor/joqrag.py12
-rw-r--r--yt_dlp/extractor/jove.py5
-rw-r--r--yt_dlp/extractor/jstream.py2
-rw-r--r--yt_dlp/extractor/kakao.py2
-rw-r--r--yt_dlp/extractor/kaltura.py8
-rw-r--r--yt_dlp/extractor/kankanews.py4
-rw-r--r--yt_dlp/extractor/kick.py32
-rw-r--r--yt_dlp/extractor/kuwo.py4
-rw-r--r--yt_dlp/extractor/lci.py27
-rw-r--r--yt_dlp/extractor/lcp.py2
-rw-r--r--yt_dlp/extractor/lecture2go.py2
-rw-r--r--yt_dlp/extractor/lecturio.py2
-rw-r--r--yt_dlp/extractor/leeco.py6
-rw-r--r--yt_dlp/extractor/libraryofcongress.py1
-rw-r--r--yt_dlp/extractor/lifenews.py2
-rw-r--r--yt_dlp/extractor/limelight.py2
-rw-r--r--yt_dlp/extractor/linkedin.py6
-rw-r--r--yt_dlp/extractor/loom.py461
-rw-r--r--yt_dlp/extractor/mainstreaming.py3
-rw-r--r--yt_dlp/extractor/manoto.py7
-rw-r--r--yt_dlp/extractor/masters.py1
-rw-r--r--yt_dlp/extractor/medaltv.py2
-rw-r--r--yt_dlp/extractor/mediaklikk.py7
-rw-r--r--yt_dlp/extractor/mediaset.py4
-rw-r--r--yt_dlp/extractor/mediasite.py5
-rw-r--r--yt_dlp/extractor/medici.py182
-rw-r--r--yt_dlp/extractor/microsoftstream.py6
-rw-r--r--yt_dlp/extractor/mildom.py4
-rw-r--r--yt_dlp/extractor/mit.py4
-rw-r--r--yt_dlp/extractor/mixch.py103
-rw-r--r--yt_dlp/extractor/monstercat.py2
-rw-r--r--yt_dlp/extractor/motherless.py4
-rw-r--r--yt_dlp/extractor/moviepilot.py8
-rw-r--r--yt_dlp/extractor/movingimage.py2
-rw-r--r--yt_dlp/extractor/msn.py2
-rw-r--r--yt_dlp/extractor/n1.py2
-rw-r--r--yt_dlp/extractor/naver.py4
-rw-r--r--yt_dlp/extractor/nba.py2
-rw-r--r--yt_dlp/extractor/nbc.py2
-rw-r--r--yt_dlp/extractor/ndr.py2
-rw-r--r--yt_dlp/extractor/neteasemusic.py11
-rw-r--r--yt_dlp/extractor/nfb.py27
-rw-r--r--yt_dlp/extractor/nfhsnetwork.py8
-rw-r--r--yt_dlp/extractor/nhk.py202
-rw-r--r--yt_dlp/extractor/nhl.py2
-rw-r--r--yt_dlp/extractor/niconico.py11
-rw-r--r--yt_dlp/extractor/ninenews.py2
-rw-r--r--yt_dlp/extractor/ninenow.py2
-rw-r--r--yt_dlp/extractor/nitter.py9
-rw-r--r--yt_dlp/extractor/nobelprize.py6
-rw-r--r--yt_dlp/extractor/noz.py6
-rw-r--r--yt_dlp/extractor/nts.py76
-rw-r--r--yt_dlp/extractor/nuevo.py6
-rw-r--r--yt_dlp/extractor/nuvid.py2
-rw-r--r--yt_dlp/extractor/nzherald.py5
-rw-r--r--yt_dlp/extractor/odkmedia.py2
-rw-r--r--yt_dlp/extractor/olympics.py5
-rw-r--r--yt_dlp/extractor/onenewsnz.py6
-rw-r--r--yt_dlp/extractor/onet.py4
-rw-r--r--yt_dlp/extractor/opencast.py2
-rw-r--r--yt_dlp/extractor/openrec.py2
-rw-r--r--yt_dlp/extractor/ora.py1
-rw-r--r--yt_dlp/extractor/orf.py225
-rw-r--r--yt_dlp/extractor/packtpub.py3
-rw-r--r--yt_dlp/extractor/panopto.py20
-rw-r--r--yt_dlp/extractor/paramountplus.py2
-rw-r--r--yt_dlp/extractor/patreon.py219
-rw-r--r--yt_dlp/extractor/pbs.py4
-rw-r--r--yt_dlp/extractor/pearvideo.py2
-rw-r--r--yt_dlp/extractor/peertube.py10
-rw-r--r--yt_dlp/extractor/piapro.py38
-rw-r--r--yt_dlp/extractor/piksel.py21
-rw-r--r--yt_dlp/extractor/pladform.py4
-rw-r--r--yt_dlp/extractor/platzi.py2
-rw-r--r--yt_dlp/extractor/playtvak.py2
-rw-r--r--yt_dlp/extractor/pluralsight.py2
-rw-r--r--yt_dlp/extractor/polsatgo.py6
-rw-r--r--yt_dlp/extractor/porn91.py95
-rw-r--r--yt_dlp/extractor/pornflip.py6
-rw-r--r--yt_dlp/extractor/pornhub.py2
-rw-r--r--yt_dlp/extractor/pornovoisines.py2
-rw-r--r--yt_dlp/extractor/pr0gramm.py6
-rw-r--r--yt_dlp/extractor/prosiebensat1.py10
-rw-r--r--yt_dlp/extractor/prx.py11
-rw-r--r--yt_dlp/extractor/puhutv.py2
-rw-r--r--yt_dlp/extractor/qingting.py1
-rw-r--r--yt_dlp/extractor/qqmusic.py2
-rw-r--r--yt_dlp/extractor/radiocanada.py2
-rw-r--r--yt_dlp/extractor/radiocomercial.py2
-rw-r--r--yt_dlp/extractor/radiokapital.py14
-rw-r--r--yt_dlp/extractor/radiozet.py2
-rw-r--r--yt_dlp/extractor/radlive.py4
-rw-r--r--yt_dlp/extractor/rai.py4
-rw-r--r--yt_dlp/extractor/rbgtum.py2
-rw-r--r--yt_dlp/extractor/rcti.py4
-rw-r--r--yt_dlp/extractor/rds.py4
-rw-r--r--yt_dlp/extractor/redbulltv.py2
-rw-r--r--yt_dlp/extractor/reddit.py63
-rw-r--r--yt_dlp/extractor/redgifs.py2
-rw-r--r--yt_dlp/extractor/redtube.py2
-rw-r--r--yt_dlp/extractor/reuters.py2
-rw-r--r--yt_dlp/extractor/rmcdecouverte.py2
-rw-r--r--yt_dlp/extractor/rokfin.py4
-rw-r--r--yt_dlp/extractor/rte.py2
-rw-r--r--yt_dlp/extractor/rtp.py9
-rw-r--r--yt_dlp/extractor/rtvcplay.py7
-rw-r--r--yt_dlp/extractor/rtvs.py1
-rw-r--r--yt_dlp/extractor/rutube.py2
-rw-r--r--yt_dlp/extractor/rutv.py6
-rw-r--r--yt_dlp/extractor/ruutu.py2
-rw-r--r--yt_dlp/extractor/safari.py1
-rw-r--r--yt_dlp/extractor/scrippsnetworks.py4
-rw-r--r--yt_dlp/extractor/scte.py2
-rw-r--r--yt_dlp/extractor/sejmpl.py14
-rw-r--r--yt_dlp/extractor/sendtonews.py6
-rw-r--r--yt_dlp/extractor/seznamzpravy.py2
-rw-r--r--yt_dlp/extractor/shahid.py2
-rw-r--r--yt_dlp/extractor/sharepoint.py112
-rw-r--r--yt_dlp/extractor/shemaroome.py2
-rw-r--r--yt_dlp/extractor/sixplay.py2
-rw-r--r--yt_dlp/extractor/skynewsarabia.py2
-rw-r--r--yt_dlp/extractor/sohu.py8
-rw-r--r--yt_dlp/extractor/sonyliv.py64
-rw-r--r--yt_dlp/extractor/soundcloud.py209
-rw-r--r--yt_dlp/extractor/sovietscloset.py5
-rw-r--r--yt_dlp/extractor/spankbang.py2
-rw-r--r--yt_dlp/extractor/springboardplatform.py6
-rw-r--r--yt_dlp/extractor/stacommu.py10
-rw-r--r--yt_dlp/extractor/startv.py4
-rw-r--r--yt_dlp/extractor/stitcher.py2
-rw-r--r--yt_dlp/extractor/storyfire.py2
-rw-r--r--yt_dlp/extractor/streamable.py2
-rw-r--r--yt_dlp/extractor/stripchat.py2
-rw-r--r--yt_dlp/extractor/stv.py2
-rw-r--r--yt_dlp/extractor/sunporno.py4
-rw-r--r--yt_dlp/extractor/syfy.py2
-rw-r--r--yt_dlp/extractor/taptap.py275
-rw-r--r--yt_dlp/extractor/tbs.py2
-rw-r--r--yt_dlp/extractor/teachable.py4
-rw-r--r--yt_dlp/extractor/teachertube.py2
-rw-r--r--yt_dlp/extractor/teamcoco.py2
-rw-r--r--yt_dlp/extractor/teamtreehouse.py2
-rw-r--r--yt_dlp/extractor/ted.py5
-rw-r--r--yt_dlp/extractor/tele13.py2
-rw-r--r--yt_dlp/extractor/tele5.py134
-rw-r--r--yt_dlp/extractor/telewebion.py10
-rw-r--r--yt_dlp/extractor/tempo.py2
-rw-r--r--yt_dlp/extractor/tencent.py2
-rw-r--r--yt_dlp/extractor/tenplay.py4
-rw-r--r--yt_dlp/extractor/theguardian.py2
-rw-r--r--yt_dlp/extractor/theintercept.py4
-rw-r--r--yt_dlp/extractor/theplatform.py24
-rw-r--r--yt_dlp/extractor/thisoldhouse.py52
-rw-r--r--yt_dlp/extractor/thisvid.py2
-rw-r--r--yt_dlp/extractor/threeqsdn.py2
-rw-r--r--yt_dlp/extractor/tiktok.py753
-rw-r--r--yt_dlp/extractor/toypics.py3
-rw-r--r--yt_dlp/extractor/triller.py2
-rw-r--r--yt_dlp/extractor/trueid.py4
-rw-r--r--yt_dlp/extractor/tumblr.py2
-rw-r--r--yt_dlp/extractor/turner.py12
-rw-r--r--yt_dlp/extractor/tv2.py4
-rw-r--r--yt_dlp/extractor/tv2hu.py2
-rw-r--r--yt_dlp/extractor/tv5mondeplus.py149
-rw-r--r--yt_dlp/extractor/tva.py44
-rw-r--r--yt_dlp/extractor/tvanouvelles.py2
-rw-r--r--yt_dlp/extractor/tvn24.py2
-rw-r--r--yt_dlp/extractor/tvp.py2
-rw-r--r--yt_dlp/extractor/tvplay.py2
-rw-r--r--yt_dlp/extractor/tvplayer.py2
-rw-r--r--yt_dlp/extractor/tweakers.py2
-rw-r--r--yt_dlp/extractor/twitch.py10
-rw-r--r--yt_dlp/extractor/twitter.py49
-rw-r--r--yt_dlp/extractor/udn.py2
-rw-r--r--yt_dlp/extractor/ukcolumn.py8
-rw-r--r--yt_dlp/extractor/unsupported.py14
-rw-r--r--yt_dlp/extractor/urplay.py4
-rw-r--r--yt_dlp/extractor/usatoday.py2
-rw-r--r--yt_dlp/extractor/ustream.py4
-rw-r--r--yt_dlp/extractor/ustudio.py2
-rw-r--r--yt_dlp/extractor/veo.py1
-rw-r--r--yt_dlp/extractor/vesti.py2
-rw-r--r--yt_dlp/extractor/vevo.py2
-rw-r--r--yt_dlp/extractor/vice.py4
-rw-r--r--yt_dlp/extractor/vidio.py2
-rw-r--r--yt_dlp/extractor/vidlii.py2
-rw-r--r--yt_dlp/extractor/vimeo.py10
-rw-r--r--yt_dlp/extractor/viu.py6
-rw-r--r--yt_dlp/extractor/vk.py35
-rw-r--r--yt_dlp/extractor/voot.py212
-rw-r--r--yt_dlp/extractor/vrt.py62
-rw-r--r--yt_dlp/extractor/walla.py2
-rw-r--r--yt_dlp/extractor/washingtonpost.py1
-rw-r--r--yt_dlp/extractor/wdr.py4
-rw-r--r--yt_dlp/extractor/weibo.py2
-rw-r--r--yt_dlp/extractor/whowatch.py4
-rw-r--r--yt_dlp/extractor/wimtv.py2
-rw-r--r--yt_dlp/extractor/wistia.py4
-rw-r--r--yt_dlp/extractor/wppilot.py10
-rw-r--r--yt_dlp/extractor/wrestleuniverse.py18
-rw-r--r--yt_dlp/extractor/wsj.py2
-rw-r--r--yt_dlp/extractor/xfileshare.py198
-rw-r--r--yt_dlp/extractor/xhamster.py2
-rw-r--r--yt_dlp/extractor/xiaohongshu.py83
-rw-r--r--yt_dlp/extractor/xnxx.py2
-rw-r--r--yt_dlp/extractor/xstream.py4
-rw-r--r--yt_dlp/extractor/xvideos.py73
-rw-r--r--yt_dlp/extractor/xxxymovies.py2
-rw-r--r--yt_dlp/extractor/yandexmusic.py2
-rw-r--r--yt_dlp/extractor/yandexvideo.py10
-rw-r--r--yt_dlp/extractor/youporn.py411
-rw-r--r--yt_dlp/extractor/yourporn.py65
-rw-r--r--yt_dlp/extractor/yourupload.py43
-rw-r--r--yt_dlp/extractor/youtube.py272
-rw-r--r--yt_dlp/extractor/zapiks.py4
-rw-r--r--yt_dlp/extractor/zattoo.py4
-rw-r--r--yt_dlp/extractor/zhihu.py2
-rw-r--r--yt_dlp/extractor/zingmp3.py2
-rw-r--r--yt_dlp/extractor/zype.py2
-rw-r--r--yt_dlp/networking/__init__.py7
-rw-r--r--yt_dlp/networking/_curlcffi.py241
-rw-r--r--yt_dlp/networking/_helper.py4
-rw-r--r--yt_dlp/networking/_requests.py25
-rw-r--r--yt_dlp/networking/_urllib.py6
-rw-r--r--yt_dlp/networking/_websockets.py38
-rw-r--r--yt_dlp/networking/common.py24
-rw-r--r--yt_dlp/networking/impersonate.py141
-rw-r--r--yt_dlp/options.py23
-rw-r--r--yt_dlp/update.py6
-rw-r--r--yt_dlp/utils/_utils.py139
-rw-r--r--yt_dlp/utils/traversal.py37
-rw-r--r--yt_dlp/version.py6
402 files changed, 10178 insertions, 4959 deletions
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index c4d3e81..4deee57 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -28,7 +28,6 @@ Fixes #
### Before submitting a *pull request* make sure you have:
- [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions)
- [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
-- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions)
### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply:
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
diff --git a/.github/banner.svg b/.github/banner.svg
index 35dc93e..ea7f9e3 100644
--- a/.github/banner.svg
+++ b/.github/banner.svg
@@ -1,4 +1,4 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" preserveAspectRatio="xMidYMid" width="699.935" height="173.764" viewBox="0 0 717 178">
+<svg xmlns="http://www.w3.org/2000/svg" width="746" height="176" viewBox="0 0 746 176">
<defs>
<style>
.cls-1, .cls-4 {
@@ -24,8 +24,8 @@
}
</style>
</defs>
- <path d="M89.846,166.601 L87.111,166.601 L87.111,172.000 L82.173,172.000 L82.173,153.812 L90.024,153.812 C94.064,153.812 96.773,156.370 96.773,160.242 C96.773,164.158 93.993,166.601 89.846,166.601 ZM88.851,157.755 L87.111,157.755 L87.111,162.764 L88.851,162.764 C90.583,162.764 91.622,161.796 91.622,160.242 C91.622,158.679 90.583,157.755 88.851,157.755 ZM67.898,153.812 L72.835,153.812 L72.835,168.021 L80.189,168.021 L80.189,172.000 L67.898,172.000 L67.898,153.812 ZM56.572,172.000 L49.574,172.000 L49.574,153.812 L56.501,153.812 C62.113,153.812 65.630,157.223 65.630,162.906 C65.630,168.590 62.113,172.000 56.572,172.000 ZM56.252,158.004 L54.511,158.004 L54.511,167.808 L56.394,167.808 C59.094,167.808 60.657,166.707 60.657,162.906 C60.657,159.105 59.094,158.004 56.252,158.004 ZM38.211,162.906 L46.736,162.906 L46.736,166.601 L38.211,166.601 L38.211,162.906 ZM31.253,172.000 L26.387,172.000 L26.387,157.791 L20.916,157.791 L20.916,153.812 L36.724,153.812 L36.724,157.791 L31.253,157.791 L31.253,172.000 ZM12.007,172.000 L7.104,172.000 L7.104,166.281 L0.426,153.812 L5.932,153.812 L9.484,161.201 L9.627,161.201 L13.179,153.812 L18.685,153.812 L12.007,166.281 L12.007,172.000 Z" class="cls-1"/>
- <path d="M714.317,161.947 C714.104,160.988 713.536,159.993 711.689,159.993 C710.019,159.993 708.634,160.846 708.456,162.018 C708.278,163.048 708.918,163.617 710.445,164.007 L712.399,164.505 C714.743,165.109 715.738,166.281 715.418,168.199 C715.028,170.544 712.577,172.284 709.415,172.284 C706.609,172.284 704.904,171.041 704.797,168.732 L706.893,168.235 C707.000,169.691 707.959,170.437 709.664,170.437 C711.617,170.437 713.038,169.478 713.216,168.306 C713.394,167.347 712.861,166.707 711.511,166.387 L709.344,165.855 C706.928,165.251 706.005,164.007 706.325,162.125 C706.715,159.816 709.131,158.182 712.008,158.182 C714.708,158.182 715.951,159.461 716.306,161.414 L714.317,161.947 ZM702.671,165.890 L692.751,165.890 C692.245,169.229 693.648,170.401 696.276,170.401 C697.955,170.401 699.269,169.691 700.042,168.270 L701.960,168.838 C700.974,170.899 698.736,172.284 695.957,172.284 C692.023,172.284 690.069,169.478 690.770,165.286 C691.454,161.095 694.403,158.182 698.088,158.182 C700.939,158.182 703.674,159.922 702.813,165.002 L702.671,165.890 ZM697.768,160.064 C695.477,160.064 693.461,162.143 693.044,164.078 L700.823,164.078 C701.223,161.770 700.051,160.064 697.768,160.064 ZM687.862,172.000 L685.446,172.000 L683.066,166.707 L678.910,172.000 L676.494,172.000 L681.965,165.180 L678.768,158.359 L681.183,158.359 L683.528,163.936 L687.720,158.359 L690.135,158.359 L684.594,165.180 L687.862,172.000 ZM673.886,154.630 C673.886,153.848 674.560,153.209 675.377,153.209 C676.194,153.209 676.869,153.848 676.869,154.630 C676.869,155.411 676.194,156.050 675.377,156.050 C674.560,156.050 673.886,155.411 673.886,154.630 ZM673.513,172.000 L671.417,172.000 L673.690,158.359 L675.786,158.359 L673.513,172.000 ZM670.212,154.914 C668.826,154.914 668.151,155.624 667.903,156.974 L667.672,158.359 L670.745,158.359 L670.460,160.135 L667.379,160.135 L665.416,172.000 L663.320,172.000 L665.301,160.135 L663.107,160.135 L663.391,158.359 L665.603,158.359 L665.914,156.477 C666.269,154.132 668.365,152.960 670.318,152.960 C671.348,152.960 671.952,153.173 672.237,153.315 L671.348,155.127 C671.135,155.056 670.816,154.914 670.212,154.914 ZM649.225,172.000 L649.580,169.904 L649.332,169.904 C648.745,170.650 647.582,172.284 644.962,172.284 C641.543,172.284 639.616,169.549 640.327,165.215 C641.046,160.917 643.879,158.182 647.324,158.182 C649.989,158.182 650.539,159.816 650.877,160.526 L651.054,160.526 L652.173,153.812 L654.269,153.812 L651.250,172.000 L649.225,172.000 ZM647.182,160.064 C644.527,160.064 642.911,162.302 642.440,165.180 C641.952,168.093 642.849,170.401 645.477,170.401 C647.999,170.401 649.811,168.270 650.326,165.180 C650.832,162.125 649.749,160.064 647.182,160.064 ZM635.980,172.000 L633.884,172.000 L635.305,163.475 C635.660,161.343 634.701,160.064 632.747,160.064 C630.723,160.064 629.053,161.414 628.627,163.794 L627.277,172.000 L625.181,172.000 L627.454,158.359 L629.479,158.359 L629.124,160.491 L629.302,160.491 C630.154,159.105 631.611,158.182 633.671,158.182 C636.406,158.182 638.005,159.851 637.436,163.333 L635.980,172.000 ZM621.349,172.000 L619.253,172.000 L619.573,170.153 L619.466,170.153 C618.898,171.041 617.442,172.320 615.062,172.320 C612.468,172.320 610.657,170.792 611.083,168.128 C611.616,165.002 614.458,164.434 617.051,164.114 C619.573,163.794 620.603,163.865 620.781,162.871 L620.781,162.800 C621.065,161.059 620.354,160.029 618.436,160.029 C616.447,160.029 615.097,161.095 614.458,162.089 L612.611,161.379 C614.067,158.892 616.554,158.182 618.614,158.182 C620.354,158.182 623.551,158.679 622.841,163.013 L621.349,172.000 ZM616.660,165.926 C614.991,166.139 613.428,166.636 613.179,168.235 C612.930,169.691 613.996,170.437 615.665,170.437 C618.152,170.437 619.786,168.767 620.070,167.062 L620.390,165.144 C619.964,165.570 617.548,165.819 616.660,165.926 ZM597.804,159.993 C596.135,159.993 594.749,160.846 594.572,162.018 C594.394,163.048 595.033,163.617 596.561,164.007 L598.515,164.505 C600.859,165.109 601.854,166.281 601.534,168.199 C601.143,170.544 598.692,172.284 595.531,172.284 C592.724,172.284 591.019,171.041 590.913,168.732 L593.009,168.235 C593.115,169.691 594.074,170.437 595.779,170.437 C597.733,170.437 599.154,169.478 599.332,168.306 C599.509,167.347 598.976,166.707 597.627,166.387 L595.460,165.855 C593.044,165.251 592.121,164.007 592.440,162.125 C592.831,159.816 595.247,158.182 598.124,158.182 C600.824,158.182 602.067,159.461 602.422,161.414 L600.433,161.947 C600.220,160.988 599.651,159.993 597.804,159.993 ZM588.786,165.890 L578.866,165.890 C578.360,169.229 579.763,170.401 582.392,170.401 C584.071,170.401 585.385,169.691 586.157,168.270 L588.076,168.838 C587.090,170.899 584.852,172.284 582.072,172.284 C578.138,172.284 576.185,169.478 576.886,165.286 C577.570,161.095 580.518,158.182 584.204,158.182 C587.054,158.182 589.790,159.922 588.928,165.002 L588.786,165.890 ZM583.884,160.064 C581.593,160.064 579.577,162.143 579.160,164.078 L586.939,164.078 C587.339,161.770 586.166,160.064 583.884,160.064 ZM574.722,160.171 C572.733,160.171 571.046,161.530 570.744,163.368 L569.323,172.000 L567.227,172.000 L569.500,158.359 L571.525,158.359 L571.170,160.420 L571.312,160.420 C572.023,159.070 573.586,158.146 575.255,158.146 C576.001,158.146 576.534,158.324 576.889,158.644 L575.894,160.384 C575.646,160.242 575.255,160.171 574.722,160.171 ZM561.299,172.000 L561.690,169.691 L561.548,169.691 C560.695,171.076 559.132,172.178 557.072,172.178 C554.515,172.178 552.952,170.508 553.520,167.027 L554.976,158.359 L557.072,158.359 L555.651,166.885 C555.332,168.874 556.362,170.153 558.102,170.153 C559.665,170.153 561.797,168.981 562.223,166.423 L563.573,158.359 L565.669,158.359 L563.395,172.000 L561.299,172.000 ZM551.534,160.135 L548.594,160.135 L547.271,168.093 C546.987,169.869 547.839,170.153 548.763,170.153 C549.225,170.153 549.509,170.082 549.686,170.046 L549.829,171.929 C549.509,172.036 548.976,172.178 548.195,172.178 C546.418,172.178 544.713,171.041 545.104,168.661 L546.507,160.135 L544.465,160.135 L544.749,158.359 L546.800,158.359 L547.342,155.091 L549.438,155.091 L548.896,158.359 L551.818,158.359 L551.534,160.135 ZM539.780,172.000 L537.684,172.000 L538.004,170.153 L537.897,170.153 C537.329,171.041 535.873,172.320 533.493,172.320 C530.900,172.320 529.088,170.792 529.514,168.128 C530.047,165.002 532.889,164.434 535.482,164.114 C538.004,163.794 539.034,163.865 539.212,162.871 L539.212,162.800 C539.496,161.059 538.786,160.029 536.867,160.029 C534.878,160.029 533.528,161.095 532.889,162.089 L531.042,161.379 C532.498,158.892 534.985,158.182 537.045,158.182 C538.786,158.182 541.983,158.679 541.272,163.013 L539.780,172.000 ZM535.091,165.926 C533.422,166.139 531.859,166.636 531.610,168.235 C531.361,169.691 532.427,170.437 534.097,170.437 C536.583,170.437 538.217,168.767 538.501,167.062 L538.821,165.144 C538.395,165.570 535.979,165.819 535.091,165.926 ZM527.316,165.890 L517.397,165.890 C516.891,169.229 518.294,170.401 520.922,170.401 C522.601,170.401 523.915,169.691 524.688,168.270 L526.606,168.838 C525.620,170.899 523.382,172.284 520.603,172.284 C516.669,172.284 514.715,169.478 515.416,165.286 C516.100,161.095 519.049,158.182 522.734,158.182 C525.585,158.182 528.320,159.922 527.459,165.002 L527.316,165.890 ZM522.414,160.064 C520.123,160.064 518.107,162.143 517.690,164.078 L525.469,164.078 C525.869,161.770 524.697,160.064 522.414,160.064 ZM514.282,154.914 C512.897,154.914 512.222,155.624 511.973,156.974 L511.742,158.359 L514.815,158.359 L514.531,160.135 L511.449,160.135 L509.487,172.000 L507.391,172.000 L509.371,160.135 L507.178,160.135 L507.462,158.359 L509.673,158.359 L509.984,156.477 C510.339,154.132 512.435,152.960 514.389,152.960 C515.419,152.960 516.023,153.173 516.307,153.315 L515.419,155.127 C515.206,155.056 514.886,154.914 514.282,154.914 ZM493.506,172.000 L496.525,153.812 L498.621,153.812 L495.601,172.000 L493.506,172.000 ZM489.674,172.000 L487.578,172.000 L487.898,170.153 L487.791,170.153 C487.223,171.041 485.766,172.320 483.386,172.320 C480.793,172.320 478.981,170.792 479.408,168.128 C479.941,165.002 482.782,164.434 485.375,164.114 C487.898,163.794 488.928,163.865 489.105,162.871 L489.105,162.800 C489.390,161.059 488.679,160.029 486.761,160.029 C484.772,160.029 483.422,161.095 482.782,162.089 L480.935,161.379 C482.392,158.892 484.878,158.182 486.938,158.182 C488.679,158.182 491.876,158.679 491.166,163.013 L489.674,172.000 ZM484.985,165.926 C483.315,166.139 481.752,166.636 481.504,168.235 C481.255,169.691 482.321,170.437 483.990,170.437 C486.477,170.437 488.111,168.767 488.395,167.062 L488.715,165.144 C488.288,165.570 485.873,165.819 484.985,165.926 ZM475.576,172.000 L473.480,172.000 L474.901,163.475 C475.256,161.343 474.297,160.064 472.343,160.064 C470.319,160.064 468.649,161.414 468.223,163.794 L466.873,172.000 L464.777,172.000 L467.051,158.359 L469.075,158.359 L468.720,160.491 L468.898,160.491 C469.750,159.105 471.207,158.182 473.267,158.182 C476.002,158.182 477.601,159.851 477.032,163.333 L475.576,172.000 ZM455.511,172.284 C451.745,172.284 449.703,169.407 450.395,165.109 C451.070,160.917 453.948,158.182 457.571,158.182 C461.336,158.182 463.388,161.059 462.686,165.393 C462.011,169.549 459.134,172.284 455.511,172.284 ZM457.535,160.064 C454.658,160.064 452.873,162.587 452.420,165.393 C451.994,168.057 452.811,170.401 455.546,170.401 C458.423,170.401 460.208,167.924 460.661,165.109 C461.088,162.444 460.271,160.064 457.535,160.064 ZM446.401,154.630 C446.401,153.848 447.076,153.209 447.893,153.209 C448.710,153.209 449.385,153.848 449.385,154.630 C449.385,155.411 448.710,156.050 447.893,156.050 C447.076,156.050 446.401,155.411 446.401,154.630 ZM446.028,172.000 L443.932,172.000 L446.206,158.359 L448.301,158.359 L446.028,172.000 ZM442.763,160.135 L439.823,160.135 L438.500,168.093 C438.216,169.869 439.069,170.153 439.992,170.153 C440.454,170.153 440.738,170.082 440.916,170.046 L441.058,171.929 C440.738,172.036 440.205,172.178 439.424,172.178 C437.648,172.178 435.943,171.041 436.333,168.661 L437.736,160.135 L435.694,160.135 L435.978,158.359 L438.030,158.359 L438.571,155.091 L440.667,155.091 L440.125,158.359 L443.047,158.359 L442.763,160.135 ZM431.380,154.630 C431.380,153.848 432.055,153.209 432.872,153.209 C433.689,153.209 434.364,153.848 434.364,154.630 C434.364,155.411 433.689,156.050 432.872,156.050 C432.055,156.050 431.380,155.411 431.380,154.630 ZM431.007,172.000 L428.911,172.000 L431.184,158.359 L433.280,158.359 L431.007,172.000 ZM422.770,172.000 L423.126,169.904 L422.877,169.904 C422.291,170.650 421.128,172.284 418.508,172.284 C415.089,172.284 413.162,169.549 413.872,165.215 C414.591,160.917 417.424,158.182 420.870,158.182 C423.534,158.182 424.085,159.816 424.422,160.526 L424.600,160.526 L425.719,153.812 L427.815,153.812 L424.795,172.000 L422.770,172.000 ZM420.728,160.064 C418.073,160.064 416.456,162.302 415.986,165.180 C415.497,168.093 416.394,170.401 419.023,170.401 C421.545,170.401 423.357,168.270 423.872,165.180 C424.378,162.125 423.294,160.064 420.728,160.064 ZM407.216,172.000 L407.572,169.904 L407.323,169.904 C406.737,170.650 405.573,172.284 402.954,172.284 C399.535,172.284 397.608,169.549 398.318,165.215 C399.037,160.917 401.870,158.182 405.316,158.182 C407.980,158.182 408.531,159.816 408.868,160.526 L409.046,160.526 L410.165,153.812 L412.261,153.812 L409.241,172.000 L407.216,172.000 ZM405.174,160.064 C402.519,160.064 400.902,162.302 400.432,165.180 C399.943,168.093 400.840,170.401 403.469,170.401 C405.991,170.401 407.803,168.270 408.318,165.180 C408.824,162.125 407.740,160.064 405.174,160.064 ZM393.971,172.000 L391.875,172.000 L392.195,170.153 L392.088,170.153 C391.520,171.041 390.063,172.320 387.683,172.320 C385.090,172.320 383.279,170.792 383.705,168.128 C384.238,165.002 387.080,164.434 389.673,164.114 C392.195,163.794 393.225,163.865 393.403,162.871 L393.403,162.800 C393.687,161.059 392.976,160.029 391.058,160.029 C389.069,160.029 387.719,161.095 387.080,162.089 L385.232,161.379 C386.689,158.892 389.175,158.182 391.236,158.182 C392.976,158.182 396.173,158.679 395.463,163.013 L393.971,172.000 ZM389.282,165.926 C387.612,166.139 386.049,166.636 385.801,168.235 C385.552,169.691 386.618,170.437 388.287,170.437 C390.774,170.437 392.408,168.767 392.692,167.062 L393.012,165.144 C392.586,165.570 390.170,165.819 389.282,165.926 ZM372.842,172.000 L370.746,172.000 L372.167,163.475 C372.522,161.308 371.528,160.064 369.574,160.064 C367.513,160.064 365.773,161.414 365.347,163.794 L363.997,172.000 L361.901,172.000 L364.920,153.812 L367.016,153.812 L365.915,160.491 L366.093,160.491 C366.945,159.070 368.330,158.182 370.497,158.182 C373.268,158.182 374.867,159.816 374.298,163.333 L372.842,172.000 ZM360.448,160.135 L357.508,160.135 L356.185,168.093 C355.901,169.869 356.753,170.153 357.677,170.153 C358.139,170.153 358.423,170.082 358.601,170.046 L358.743,171.929 C358.423,172.036 357.890,172.178 357.109,172.178 C355.333,172.178 353.627,171.041 354.018,168.661 L355.421,160.135 L353.379,160.135 L353.663,158.359 L355.714,158.359 L356.256,155.091 L358.352,155.091 L357.810,158.359 L360.732,158.359 L360.448,160.135 ZM349.065,154.630 C349.065,153.848 349.740,153.209 350.557,153.209 C351.374,153.209 352.049,153.848 352.049,154.630 C352.049,155.411 351.374,156.050 350.557,156.050 C349.740,156.050 349.065,155.411 349.065,154.630 ZM348.692,172.000 L346.596,172.000 L348.869,158.359 L350.965,158.359 L348.692,172.000 ZM337.615,172.000 L336.372,161.521 L336.159,161.521 L331.434,172.000 L329.374,172.000 L327.491,158.359 L329.694,158.359 L330.901,168.803 L331.043,168.803 L335.697,158.359 L337.935,158.359 L339.072,168.767 L339.214,168.767 L343.903,158.359 L346.105,158.359 L339.675,172.000 L337.615,172.000 ZM316.983,172.000 L314.319,172.000 L310.296,165.526 L308.600,166.885 L307.747,172.000 L305.651,172.000 L308.671,153.812 L310.767,153.812 L308.999,164.434 L309.239,164.434 L316.237,158.359 L318.830,158.359 L312.090,164.203 L316.983,172.000 ZM303.559,160.171 C301.569,160.171 299.882,161.530 299.580,163.368 L298.159,172.000 L296.063,172.000 L298.337,158.359 L300.362,158.359 L300.006,160.420 L300.149,160.420 C300.859,159.070 302.422,158.146 304.091,158.146 C304.837,158.146 305.370,158.324 305.726,158.644 L304.731,160.384 C304.482,160.242 304.091,160.171 303.559,160.171 ZM286.797,172.284 C283.031,172.284 280.989,169.407 281.682,165.109 C282.356,160.917 285.234,158.182 288.857,158.182 C292.622,158.182 294.674,161.059 293.972,165.393 C293.297,169.549 290.420,172.284 286.797,172.284 ZM288.822,160.064 C285.944,160.064 284.159,162.587 283.706,165.393 C283.280,168.057 284.097,170.401 286.832,170.401 C289.710,170.401 291.495,167.924 291.948,165.109 C292.374,162.444 291.557,160.064 288.822,160.064 ZM280.512,154.914 C279.126,154.914 278.452,155.624 278.203,156.974 L277.972,158.359 L281.045,158.359 L280.760,160.135 L277.679,160.135 L275.716,172.000 L273.620,172.000 L275.601,160.135 L273.407,160.135 L273.691,158.359 L275.903,158.359 L276.214,156.477 C276.569,154.132 278.665,152.960 280.618,152.960 C281.649,152.960 282.252,153.173 282.537,153.315 L281.649,155.127 C281.435,155.056 281.116,154.914 280.512,154.914 ZM259.735,172.000 L262.755,153.812 L264.851,153.812 L261.831,172.000 L259.735,172.000 ZM253.595,172.000 L253.950,169.904 L253.701,169.904 C253.115,170.650 251.952,172.284 249.332,172.284 C245.913,172.284 243.986,169.549 244.696,165.215 C245.416,160.917 248.249,158.182 251.694,158.182 C254.358,158.182 254.909,159.816 255.246,160.526 L255.424,160.526 L256.543,153.812 L258.639,153.812 L255.619,172.000 L253.595,172.000 ZM251.552,160.064 C248.897,160.064 247.281,162.302 246.810,165.180 C246.321,168.093 247.218,170.401 249.847,170.401 C252.369,170.401 254.181,168.270 254.696,165.180 C255.202,162.125 254.119,160.064 251.552,160.064 ZM233.670,165.180 L233.990,163.226 L241.947,163.226 L241.627,165.180 L233.670,165.180 ZM230.478,165.890 L220.558,165.890 C220.052,169.229 221.455,170.401 224.084,170.401 C225.762,170.401 227.077,169.691 227.849,168.270 L229.768,168.838 C228.782,170.899 226.544,172.284 223.764,172.284 C219.830,172.284 217.876,169.478 218.578,165.286 C219.262,161.095 222.210,158.182 225.896,158.182 C228.746,158.182 231.482,159.922 230.620,165.002 L230.478,165.890 ZM225.576,160.064 C223.285,160.064 221.269,162.143 220.851,164.078 L228.631,164.078 C229.030,161.770 227.858,160.064 225.576,160.064 ZM209.063,172.284 C206.434,172.284 205.830,170.650 205.475,169.904 L205.226,169.904 L204.871,172.000 L202.846,172.000 L205.866,153.812 L207.962,153.812 L206.860,160.526 L207.038,160.526 C207.606,159.816 208.708,158.182 211.372,158.182 C214.817,158.182 216.736,160.917 216.025,165.215 C215.315,169.549 212.473,172.284 209.063,172.284 ZM210.803,160.064 C208.246,160.064 206.541,162.125 206.043,165.180 C205.546,168.270 206.576,170.401 209.098,170.401 C211.727,170.401 213.432,168.093 213.929,165.180 C214.391,162.302 213.468,160.064 210.803,160.064 ZM196.634,172.000 L197.025,169.691 L196.883,169.691 C196.031,171.076 194.468,172.178 192.407,172.178 C189.850,172.178 188.287,170.508 188.855,167.027 L190.311,158.359 L192.407,158.359 L190.986,166.885 C190.667,168.874 191.697,170.153 193.437,170.153 C195.000,170.153 197.132,168.981 197.558,166.423 L198.908,158.359 L201.004,158.359 L198.730,172.000 L196.634,172.000 ZM186.869,160.135 L183.929,160.135 L182.606,168.093 C182.322,169.869 183.174,170.153 184.098,170.153 C184.560,170.153 184.844,170.082 185.022,170.046 L185.164,171.929 C184.844,172.036 184.311,172.178 183.530,172.178 C181.754,172.178 180.048,171.041 180.439,168.661 L181.842,160.135 L179.800,160.135 L180.084,158.359 L182.135,158.359 L182.677,155.091 L184.773,155.091 L184.231,158.359 L187.153,158.359 L186.869,160.135 ZM173.020,172.000 L173.410,169.691 L173.268,169.691 C172.416,171.076 170.853,172.178 168.792,172.178 C166.235,172.178 164.672,170.508 165.240,167.027 L166.697,158.359 L168.792,158.359 L167.372,166.885 C167.052,168.874 168.082,170.153 169.823,170.153 C171.386,170.153 173.517,168.981 173.943,166.423 L175.293,158.359 L177.389,158.359 L175.115,172.000 L173.020,172.000 ZM155.157,172.284 C151.391,172.284 149.349,169.407 150.041,165.109 C150.716,160.917 153.594,158.182 157.217,158.182 C160.982,158.182 163.034,161.059 162.332,165.393 C161.657,169.549 158.780,172.284 155.157,172.284 ZM157.181,160.064 C154.304,160.064 152.519,162.587 152.066,165.393 C151.640,168.057 152.457,170.401 155.192,170.401 C158.069,170.401 159.854,167.924 160.307,165.109 C160.734,162.444 159.917,160.064 157.181,160.064 ZM136.511,177.293 C135.801,177.293 135.197,177.151 135.019,177.044 L135.836,175.197 C137.293,175.588 138.207,175.366 139.317,173.350 L140.063,172.000 L137.293,158.359 L139.495,158.359 L141.449,169.229 L141.591,169.229 L147.168,158.359 L149.512,158.359 L141.023,174.202 C139.886,176.298 138.429,177.293 136.511,177.293 ZM123.225,166.849 L115.871,166.849 L113.181,172.000 L110.872,172.000 L120.569,153.812 L122.843,153.812 L126.501,172.000 L124.192,172.000 L123.225,166.849 ZM121.315,156.690 L121.173,156.690 L116.893,164.895 L122.860,164.895 L121.315,156.690 Z" class="cls-2"/>
- <path d="M252.245,116.350 L252.245,102.200 L309.303,102.200 L309.303,116.350 L252.245,116.350 ZM208.254,81.088 L245.342,59.291 L208.254,38.180 L216.242,25.227 L260.862,52.844 L260.862,65.739 L216.413,93.355 L208.254,81.088 Z" class="cls-3"/>
- <path d="M508.108,52.635 C507.921,55.093 507.643,57.527 507.274,59.937 L504.214,78.017 C503.658,81.170 502.754,84.324 501.502,87.475 C500.250,90.628 498.464,93.479 496.147,96.028 C493.829,98.579 491.047,100.503 487.802,101.800 C484.556,103.097 481.311,103.747 478.067,103.747 C476.211,103.747 474.357,103.491 472.504,102.982 C470.648,102.474 469.072,101.615 467.775,100.409 C466.475,99.205 465.410,97.767 464.576,96.098 C463.741,94.429 463.092,92.714 462.629,90.952 L455.953,131.146 L436.482,131.146 L453.310,28.922 L472.921,28.922 L471.391,38.240 C472.504,36.665 473.777,35.180 475.216,33.790 C476.652,32.399 478.228,31.240 479.944,30.313 C481.659,29.387 483.467,28.737 485.369,28.365 C487.268,27.996 489.145,27.809 491.001,27.809 C493.411,27.809 495.706,28.226 497.886,29.061 C500.063,29.895 501.871,31.171 503.310,32.886 C504.746,34.602 505.835,36.549 506.578,38.727 C507.319,40.907 507.806,43.156 508.039,45.472 C508.269,47.791 508.293,50.179 508.108,52.635 ZM487.455,48.184 C486.851,46.841 485.877,45.798 484.534,45.055 C483.189,44.314 481.729,43.942 480.153,43.942 C478.762,43.942 477.393,44.151 476.050,44.568 C474.705,44.986 473.499,45.681 472.434,46.655 C471.367,47.628 470.556,48.765 470.000,50.062 C469.444,51.362 469.027,52.659 468.748,53.956 L465.828,72.037 C465.641,73.149 465.480,74.286 465.341,75.444 C465.202,76.605 465.178,77.717 465.271,78.782 C465.363,79.849 465.526,80.916 465.758,81.981 C465.988,83.048 466.384,84.022 466.940,84.902 C467.497,85.784 468.283,86.456 469.305,86.918 C470.324,87.383 471.391,87.614 472.504,87.614 C474.079,87.614 475.633,87.314 477.163,86.710 C478.693,86.108 480.036,85.204 481.196,83.998 C482.354,82.794 483.235,81.425 483.839,79.895 C484.441,78.365 484.882,76.859 485.160,75.375 L488.081,57.294 C488.359,55.719 488.474,54.143 488.428,52.565 C488.381,50.990 488.057,49.530 487.455,48.184 ZM433.422,86.501 L433.422,102.635 L384.744,102.635 L384.744,86.501 L401.433,86.501 L412.977,16.544 L400.460,16.544 L400.460,0.410 L435.230,0.410 L420.905,86.501 L433.422,86.501 ZM354.285,102.635 L355.815,93.177 C354.793,94.846 353.542,96.354 352.060,97.697 C350.575,99.042 348.976,100.179 347.261,101.105 C345.545,102.033 343.736,102.704 341.837,103.121 C339.936,103.539 338.058,103.747 336.204,103.747 C333.792,103.747 331.497,103.330 329.320,102.495 C327.140,101.661 325.332,100.388 323.896,98.671 C322.457,96.956 321.368,94.985 320.627,92.760 C319.884,90.535 319.397,88.264 319.167,85.945 C318.934,83.628 318.911,81.264 319.097,78.852 C319.282,76.442 319.606,74.032 320.071,71.620 L322.992,53.539 C323.548,50.295 324.452,47.096 325.704,43.942 C326.955,40.791 328.764,37.962 331.128,35.459 C333.492,32.955 336.274,31.056 339.473,29.756 C342.672,28.459 345.892,27.809 349.139,27.809 C351.086,27.809 352.964,28.066 354.772,28.574 C356.580,29.085 358.155,29.943 359.500,31.147 C360.843,32.353 361.934,33.790 362.769,35.459 C363.603,37.128 364.205,38.844 364.577,40.605 L371.253,0.410 L390.724,0.410 L373.895,102.635 L354.285,102.635 ZM362.004,52.705 C361.956,51.592 361.795,50.503 361.517,49.436 C361.239,48.371 360.822,47.419 360.265,46.585 C359.709,45.751 358.920,45.103 357.901,44.638 C356.880,44.175 355.815,43.942 354.702,43.942 C353.124,43.942 351.573,44.245 350.043,44.846 C348.513,45.451 347.168,46.355 346.010,47.559 C344.849,48.765 343.969,50.110 343.367,51.592 C342.763,53.076 342.370,54.606 342.185,56.182 L339.125,74.262 C338.847,75.840 338.730,77.415 338.777,78.991 C338.823,80.569 339.147,82.029 339.751,83.372 C340.353,84.717 341.326,85.760 342.672,86.501 C344.015,87.244 345.475,87.614 347.053,87.614 C348.443,87.614 349.810,87.405 351.156,86.988 C352.499,86.571 353.705,85.875 354.772,84.902 C355.836,83.928 356.671,82.794 357.275,81.494 C357.877,80.197 358.270,78.900 358.457,77.600 L361.517,59.520 C361.702,58.407 361.841,57.272 361.934,56.112 C362.026,54.954 362.049,53.817 362.004,52.705 Z" class="cls-4"/>
+ <path class="cls-1" d="M0.426,154.812L7.1,167.281V173h4.9v-5.719l6.678-12.469H13.179L9.627,162.2H9.485l-3.552-7.389H0.426Zm20.49,3.979h5.471V173h4.866V158.791h5.47v-3.979H20.916v3.979Zm25.82,5.115H38.211V167.6h8.525v-3.695ZM56.572,173c5.541,0,9.058-3.41,9.058-9.094s-3.517-9.094-9.129-9.094H49.574V173h7Zm-2.06-4.192V159h1.741c2.842,0,4.4,1.1,4.4,4.9s-1.563,4.9-4.263,4.9H54.512ZM67.9,173H80.189v-3.979H72.835V154.812H67.9V173Zm14.275,0h4.938v-5.4h2.735c4.147,0,6.927-2.443,6.927-6.359,0-3.872-2.709-6.43-6.749-6.43h-7.85V173Zm4.938-9.236v-5.009h1.741c1.732,0,2.771.924,2.771,2.487a2.452,2.452,0,0,1-2.771,2.522H87.111Z"/>
+ <path class="cls-2" d="M113.281,173l2.691-5.151h7.353L124.293,173H126.6l-3.659-18.188H120.67L110.972,173h2.309Zm3.712-7.1,4.281-8.206h0.142l1.545,8.206h-5.968Zm27.54-6.537h-3.072l0.231-1.385a2.139,2.139,0,0,1,2.309-2.06,2.987,2.987,0,0,1,1.136.213l0.888-1.812a4.324,4.324,0,0,0-1.918-.355,4.271,4.271,0,0,0-4.405,3.517l-0.31,1.882H137.18l-0.284,1.776h2.194L137.109,173h2.1l1.963-11.865h3.081Zm5.788,13.925a6.444,6.444,0,0,0,6-3.445l-1.918-.569a4.022,4.022,0,0,1-3.765,2.131c-2.629,0-4.032-1.172-3.526-4.511h9.92l0.142-.888c0.861-5.08-1.874-6.82-4.725-6.82-3.685,0-6.633,2.913-7.317,7.1C144.433,170.478,146.387,173.284,150.321,173.284Zm-2.913-8.206a5.27,5.27,0,0,1,4.725-4.014c2.282,0,3.454,1.706,3.055,4.014h-7.78Zm15.8,8.242a5.173,5.173,0,0,0,4.405-2.167h0.106L167.4,173h2.1l1.492-8.987c0.71-4.334-2.487-4.831-4.228-4.831a6.658,6.658,0,0,0-6,3.2l1.847,0.71a4.673,4.673,0,0,1,3.979-2.06c1.918,0,2.628,1.03,2.344,2.771v0.071c-0.177.994-1.208,0.923-3.73,1.243-2.593.32-5.435,0.888-5.967,4.014C158.806,171.792,160.618,173.32,163.211,173.32Zm0.6-1.883c-1.67,0-2.735-.746-2.487-2.2,0.249-1.6,1.812-2.1,3.482-2.309,0.888-.107,3.3-0.356,3.729-0.782l-0.319,1.918A4.289,4.289,0,0,1,163.815,171.437Zm17.721-12.078h-2.921l0.541-3.268h-2.1l-0.541,3.268h-2.052l-0.284,1.776h2.043l-1.4,8.526a2.88,2.88,0,0,0,3.091,3.517,5.026,5.026,0,0,0,1.634-.249l-0.142-1.883a4.017,4.017,0,0,1-.924.107c-0.923,0-1.776-.284-1.492-2.06l1.324-7.958h2.939Zm10.405,8.064a4.329,4.329,0,0,1-4.12,3.73c-1.741,0-2.771-1.279-2.451-3.268l1.421-8.526h-2.1l-1.457,8.668c-0.568,3.481,1,5.151,3.553,5.151a5.185,5.185,0,0,0,4.475-2.487h0.143L191.018,173h2.1l2.273-13.641h-2.1Zm5,5.577h2.1l1.421-8.632a3.953,3.953,0,0,1,3.979-3.2,2.457,2.457,0,0,1,1.172.213l1-1.74a2.365,2.365,0,0,0-1.634-.5,4.5,4.5,0,0,0-3.943,2.274h-0.143l0.356-2.061h-2.025Zm14.846,0.284a6.446,6.446,0,0,0,6-3.445l-1.918-.569a4.023,4.023,0,0,1-3.766,2.131c-2.628,0-4.031-1.172-3.525-4.511h9.92l0.142-.888c0.861-5.08-1.874-6.82-4.725-6.82-3.685,0-6.634,2.913-7.317,7.1C205.9,170.478,207.857,173.284,211.791,173.284Zm-2.913-8.206a5.268,5.268,0,0,1,4.724-4.014c2.283,0,3.455,1.706,3.055,4.014h-7.779Zm21.1-.852h-7.957l-0.32,1.954h7.957ZM232.208,173h2.1l1.42-8.632a3.959,3.959,0,0,1,3.979-3.2,6.363,6.363,0,0,1,1.279.142l0.355-2.131a10.544,10.544,0,0,0-1.1-.036,4.538,4.538,0,0,0-3.943,2.274h-0.142l0.355-2.061h-2.025Zm9.339,0h2.1l2.274-13.641h-2.1Zm3.961-15.95a1.422,1.422,0,1,0-1.492-1.42A1.464,1.464,0,0,0,245.508,157.05Zm7.476,16.234a6.244,6.244,0,0,0,6.074-4.263h-2.1a3.9,3.9,0,0,1-3.659,2.38c-2.5,0-3.774-2.06-3.232-5.221,0.488-3.091,2.477-5.116,4.937-5.116a2.628,2.628,0,0,1,2.878,2.38h2.1c0.062-2.557-1.821-4.262-4.689-4.262-3.7,0-6.652,2.913-7.318,7.069C247.291,170.336,249.138,173.284,252.984,173.284Zm11.9-8.49a4.229,4.229,0,0,1,4.228-3.73c1.953,0,2.948,1.244,2.593,3.411L270.279,173h2.1l1.456-8.667c0.568-3.517-1.03-5.151-3.8-5.151a4.741,4.741,0,0,0-4.4,2.309h-0.177l1.1-6.679h-2.1L261.434,173h2.095Zm23.865,8.49a6.244,6.244,0,0,0,6.074-4.263h-2.1a3.894,3.894,0,0,1-3.659,2.38c-2.5,0-3.774-2.06-3.232-5.221,0.488-3.091,2.478-5.116,4.937-5.116a2.628,2.628,0,0,1,2.878,2.38h2.1c0.062-2.557-1.821-4.262-4.689-4.262-3.7,0-6.652,2.913-7.318,7.069C283.051,170.336,284.9,173.284,288.744,173.284Zm14.1,0c3.623,0,6.5-2.735,7.175-6.891,0.7-4.334-1.349-7.211-5.115-7.211-3.623,0-6.5,2.735-7.175,6.927C297.034,170.407,299.076,173.284,302.842,173.284Zm0.035-1.883c-2.735,0-3.552-2.344-3.126-5.008,0.453-2.806,2.238-5.329,5.116-5.329,2.735,0,3.552,2.38,3.126,5.045C307.54,168.924,305.755,171.4,302.877,171.4Zm9.231,1.6h2.1l1.421-8.525a3.978,3.978,0,0,1,3.659-3.411,2.092,2.092,0,0,1,2.273,2.594L319.994,173h2.132l1.492-8.881a3.45,3.45,0,0,1,3.516-3.055c1.492,0,2.629.782,2.309,2.807L327.916,173h2.1l1.527-9.129c0.533-3.2-.959-4.689-3.374-4.689a5.191,5.191,0,0,0-4.476,2.309h-0.142a3.076,3.076,0,0,0-3.268-2.309,4.487,4.487,0,0,0-4.05,2.309h-0.178l0.356-2.132h-2.025Zm21.734,0h2.1l1.421-8.525a3.978,3.978,0,0,1,3.659-3.411,2.093,2.093,0,0,1,2.274,2.594L341.728,173h2.131l1.492-8.881a3.45,3.45,0,0,1,3.517-3.055c1.491,0,2.628.782,2.309,2.807L349.649,173h2.1l1.527-9.129c0.533-3.2-.959-4.689-3.374-4.689a5.191,5.191,0,0,0-4.476,2.309H345.28a3.075,3.075,0,0,0-3.268-2.309,4.487,4.487,0,0,0-4.05,2.309h-0.177l0.355-2.132h-2.025Zm25.711,0.32a5.173,5.173,0,0,0,4.405-2.167h0.107L363.745,173h2.1l1.492-8.987c0.71-4.334-2.487-4.831-4.228-4.831a6.659,6.659,0,0,0-6,3.2l1.847,0.71a4.674,4.674,0,0,1,3.979-2.06c1.918,0,2.628,1.03,2.344,2.771v0.071c-0.177.994-1.207,0.923-3.73,1.243-2.593.32-5.435,0.888-5.967,4.014C355.148,171.792,356.96,173.32,359.553,173.32Zm0.6-1.883c-1.67,0-2.735-.746-2.487-2.2,0.249-1.6,1.812-2.1,3.482-2.309,0.888-.107,3.3-0.356,3.73-0.782l-0.32,1.918A4.289,4.289,0,0,1,360.157,171.437Zm12.961-6.643a4.154,4.154,0,0,1,4.121-3.73c1.954,0,2.913,1.279,2.558,3.411L378.376,173h2.1l1.457-8.667c0.568-3.482-1.03-5.151-3.765-5.151a4.869,4.869,0,0,0-4.37,2.309h-0.177l0.355-2.132h-2.025L369.673,173h2.095Zm16.336,8.49a5.166,5.166,0,0,0,4.369-2.38h0.249l-0.355,2.1h2.024l3.02-18.188h-2.1l-1.119,6.714h-0.178a3.382,3.382,0,0,0-3.552-2.344c-3.446,0-6.278,2.735-7,7.033C384.108,170.549,386.035,173.284,389.454,173.284Zm0.515-1.883c-2.629,0-3.526-2.308-3.037-5.221,0.47-2.878,2.087-5.116,4.742-5.116,2.567,0,3.65,2.061,3.144,5.116C394.3,169.27,392.491,171.4,389.969,171.4Zm19.16-7.175h-7.957l-0.32,1.954h7.957Zm7.349-9.414h-2.1L411.363,173h2.1ZM417.293,173h2.1l2.273-13.641h-2.1Zm3.961-15.95a1.422,1.422,0,1,0-1.492-1.42A1.464,1.464,0,0,0,421.254,157.05Zm5.415,7.744a4.153,4.153,0,0,1,4.121-3.73c1.953,0,2.912,1.279,2.557,3.411L431.926,173h2.1l1.457-8.667c0.568-3.482-1.031-5.151-3.766-5.151a4.869,4.869,0,0,0-4.369,2.309h-0.178l0.356-2.132H425.5L423.223,173h2.1Zm16.868,8.49a6.446,6.446,0,0,0,6-3.445l-1.918-.569a4.023,4.023,0,0,1-3.766,2.131c-2.629,0-4.032-1.172-3.525-4.511h9.919l0.142-.888c0.862-5.08-1.874-6.82-4.724-6.82-3.686,0-6.634,2.913-7.318,7.1C437.65,170.478,439.6,173.284,443.537,173.284Zm-2.912-8.206a5.268,5.268,0,0,1,4.724-4.014c2.282,0,3.455,1.706,3.055,4.014h-7.779Zm22.834,8.242a5.171,5.171,0,0,0,4.4-2.167h0.107L467.65,173h2.1l1.492-8.987c0.711-4.334-2.487-4.831-4.227-4.831a6.658,6.658,0,0,0-6,3.2l1.847,0.71a4.671,4.671,0,0,1,3.978-2.06c1.918,0,2.629,1.03,2.345,2.771v0.071c-0.178.994-1.208,0.923-3.73,1.243-2.593.32-5.435,0.888-5.968,4.014C459.054,171.792,460.866,173.32,463.459,173.32Zm0.6-1.883c-1.67,0-2.736-.746-2.487-2.2,0.249-1.6,1.812-2.1,3.481-2.309,0.888-.107,3.3-0.356,3.73-0.782l-0.32,1.918A4.289,4.289,0,0,1,464.063,171.437Zm19.035-4.014a4.329,4.329,0,0,1-4.12,3.73c-1.741,0-2.771-1.279-2.452-3.268l1.421-8.526h-2.1l-1.457,8.668c-0.568,3.481,1,5.151,3.552,5.151a5.185,5.185,0,0,0,4.476-2.487h0.142L482.175,173h2.095l2.274-13.641h-2.1Zm10.155,5.861a5.166,5.166,0,0,0,4.369-2.38h0.249l-0.355,2.1h2.024l3.02-18.188h-2.1l-1.119,6.714h-0.178a3.382,3.382,0,0,0-3.552-2.344c-3.446,0-6.278,2.735-7,7.033C487.907,170.549,489.834,173.284,493.253,173.284Zm0.515-1.883c-2.629,0-3.526-2.308-3.037-5.221,0.471-2.878,2.087-5.116,4.742-5.116,2.567,0,3.65,2.061,3.144,5.116C498.1,169.27,496.29,171.4,493.768,171.4Zm9.888,1.6h2.1l2.273-13.641H505.93Zm3.961-15.95a1.422,1.422,0,1,0-1.492-1.42A1.464,1.464,0,0,0,507.617,157.05Zm7.618,16.234c3.623,0,6.5-2.735,7.175-6.891,0.7-4.334-1.35-7.211-5.115-7.211-3.623,0-6.5,2.735-7.176,6.927C509.427,170.407,511.469,173.284,515.235,173.284Zm0.035-1.883c-2.735,0-3.552-2.344-3.126-5.008,0.453-2.806,2.238-5.329,5.115-5.329,2.736,0,3.553,2.38,3.126,5.045C519.932,168.924,518.147,171.4,515.27,171.4Zm18.822-17.441h-1.918l-9.449,21.775h1.918Zm12.75,5.4H544.5l-5.577,10.87h-0.142l-1.954-10.87h-2.2L537.393,173h2.132ZM548.188,173h2.1l2.274-13.641h-2.1Zm3.96-15.95a1.422,1.422,0,1,0-1.492-1.42A1.464,1.464,0,0,0,552.148,157.05Zm7.121,16.234a5.167,5.167,0,0,0,4.369-2.38h0.249l-0.356,2.1h2.025l3.02-18.188h-2.1l-1.119,6.714h-0.178a3.382,3.382,0,0,0-3.552-2.344c-3.446,0-6.279,2.735-7,7.033C553.922,170.549,555.85,173.284,559.269,173.284Zm0.515-1.883c-2.629,0-3.526-2.308-3.037-5.221,0.47-2.878,2.086-5.116,4.742-5.116,2.566,0,3.65,2.061,3.144,5.116C564.117,169.27,562.306,171.4,559.784,171.4Zm15.572,1.883a6.446,6.446,0,0,0,6-3.445l-1.918-.569a4.023,4.023,0,0,1-3.766,2.131c-2.628,0-4.032-1.172-3.525-4.511h9.919l0.142-.888c0.862-5.08-1.873-6.82-4.724-6.82-3.686,0-6.634,2.913-7.318,7.1C569.468,170.478,571.421,173.284,575.356,173.284Zm-2.913-8.206a5.268,5.268,0,0,1,4.724-4.014c2.282,0,3.455,1.706,3.055,4.014h-7.779Zm17.472,8.206c3.623,0,6.5-2.735,7.176-6.891,0.7-4.334-1.35-7.211-5.116-7.211-3.623,0-6.5,2.735-7.175,6.927C584.107,170.407,586.15,173.284,589.915,173.284Zm0.036-1.883c-2.736,0-3.553-2.344-3.126-5.008,0.452-2.806,2.237-5.329,5.115-5.329,2.735,0,3.552,2.38,3.126,5.045C594.613,168.924,592.828,171.4,589.951,171.4Zm21.413,1.883a5.166,5.166,0,0,0,4.369-2.38h0.249l-0.356,2.1h2.025l3.02-18.188h-2.1l-1.119,6.714h-0.178a3.382,3.382,0,0,0-3.552-2.344c-3.446,0-6.279,2.735-7,7.033C606.018,170.549,607.945,173.284,611.364,173.284Zm0.515-1.883c-2.629,0-3.526-2.308-3.037-5.221,0.47-2.878,2.087-5.116,4.742-5.116,2.566,0,3.65,2.061,3.144,5.116C616.213,169.27,614.4,171.4,611.879,171.4Zm15.536,1.883c3.623,0,6.5-2.735,7.176-6.891,0.7-4.334-1.35-7.211-5.116-7.211-3.623,0-6.5,2.735-7.175,6.927C621.607,170.407,623.65,173.284,627.415,173.284Zm0.036-1.883c-2.736,0-3.553-2.344-3.126-5.008,0.453-2.806,2.238-5.329,5.115-5.329,2.735,0,3.552,2.38,3.126,5.045C632.113,168.924,630.328,171.4,627.451,171.4Zm11.86,1.6h2.06l4.724-10.479h0.213L647.552,173h2.06l6.43-13.641h-2.2l-4.689,10.408h-0.142l-1.137-10.408h-2.237L640.98,169.8h-0.142l-1.208-10.444h-2.2Zm20.667-8.206a4.154,4.154,0,0,1,4.121-3.73c1.954,0,2.913,1.279,2.557,3.411L665.236,173h2.1l1.457-8.667c0.568-3.482-1.03-5.151-3.766-5.151a4.869,4.869,0,0,0-4.369,2.309h-0.177l0.355-2.132h-2.025L656.533,173h2.1Zm16.3-9.982h-2.1L671.163,173h2.1Zm6.463,18.472c3.624,0,6.5-2.735,7.176-6.891,0.7-4.334-1.35-7.211-5.115-7.211-3.624,0-6.5,2.735-7.176,6.927C676.934,170.407,678.976,173.284,682.741,173.284Zm0.036-1.883c-2.735,0-3.552-2.344-3.126-5.008,0.453-2.806,2.238-5.329,5.115-5.329,2.735,0,3.552,2.38,3.126,5.045C687.439,168.924,685.654,171.4,682.777,171.4Zm13.21,1.919a5.171,5.171,0,0,0,4.4-2.167H700.5L700.178,173h2.1l1.492-8.987c0.71-4.334-2.487-4.831-4.227-4.831a6.658,6.658,0,0,0-6,3.2l1.847,0.71a4.671,4.671,0,0,1,3.978-2.06c1.918,0,2.629,1.03,2.345,2.771v0.071c-0.178.994-1.208,0.923-3.73,1.243-2.593.32-5.435,0.888-5.968,4.014C691.582,171.792,693.393,173.32,695.987,173.32Zm0.6-1.883c-1.669,0-2.735-.746-2.486-2.2,0.249-1.6,1.812-2.1,3.481-2.309,0.888-.107,3.3-0.356,3.73-0.782L701,168.062A4.289,4.289,0,0,1,696.59,171.437Zm14.667,1.847a5.166,5.166,0,0,0,4.369-2.38h0.249L715.52,173h2.024l3.02-18.188h-2.1l-1.119,6.714h-0.178a3.382,3.382,0,0,0-3.552-2.344c-3.446,0-6.279,2.735-7,7.033C705.911,170.549,707.838,173.284,711.257,173.284Zm0.515-1.883c-2.629,0-3.526-2.308-3.037-5.221,0.47-2.878,2.087-5.116,4.742-5.116,2.566,0,3.65,2.061,3.144,5.116C716.106,169.27,714.294,171.4,711.772,171.4Zm15.572,1.883a6.446,6.446,0,0,0,6-3.445l-1.918-.569a4.023,4.023,0,0,1-3.766,2.131c-2.628,0-4.031-1.172-3.525-4.511h9.919L734.2,166c0.862-5.08-1.873-6.82-4.724-6.82-3.685,0-6.634,2.913-7.318,7.1C721.456,170.478,723.41,173.284,727.344,173.284Zm-2.913-8.206a5.268,5.268,0,0,1,4.724-4.014c2.283,0,3.455,1.706,3.055,4.014h-7.779ZM736.255,173h2.1l1.421-8.632a3.953,3.953,0,0,1,3.978-3.2,2.462,2.462,0,0,1,1.173.213l0.994-1.74a2.361,2.361,0,0,0-1.634-.5,4.5,4.5,0,0,0-3.943,2.274H740.2l0.355-2.061h-2.024Z"/>
+ <path class="cls-3" d="M223.055,40.6l36.161,20.584L223.055,82.439,231.011,94.4l43.337-26.926V54.9l-43.5-26.926Zm97.968,76.216v-13.8H265.391v13.8h55.632Z"/>
+ <path class="cls-4" d="M344.32,103.5a12.975,12.975,0,0,1-5.424-3.824,17.885,17.885,0,0,1-3.269-5.911,31.164,31.164,0,0,1-1.46-6.815,40.017,40.017,0,0,1-.07-7.093,64.082,64.082,0,0,1,.974-7.232l2.921-18.081a47.854,47.854,0,0,1,2.712-9.6,26.529,26.529,0,0,1,5.424-8.484,23.518,23.518,0,0,1,8.345-5.7,25.528,25.528,0,0,1,9.666-1.947,20.709,20.709,0,0,1,5.633.765,11.788,11.788,0,0,1,4.728,2.573,14.385,14.385,0,0,1,3.269,4.312,21.361,21.361,0,0,1,1.808,5.146L386.253,1.41h19.471L388.9,103.635h-19.61l1.53-9.458a20.581,20.581,0,0,1-3.755,4.52,24.418,24.418,0,0,1-4.8,3.408,20.667,20.667,0,0,1-5.424,2.016,26.173,26.173,0,0,1-5.633.626A19.058,19.058,0,0,1,344.32,103.5Zm21.836-15.507a9.49,9.49,0,0,0,3.616-2.086,10.662,10.662,0,0,0,2.5-3.407,13.948,13.948,0,0,0,1.182-3.894l3.06-18.08q0.277-1.669.417-3.407A28.276,28.276,0,0,0,377,53.7a15.584,15.584,0,0,0-.487-3.268,9.346,9.346,0,0,0-1.252-2.851,5.5,5.5,0,0,0-2.364-1.947,7.659,7.659,0,0,0-3.2-.7,12.622,12.622,0,0,0-4.659.9,11.3,11.3,0,0,0-4.033,2.712,12.64,12.64,0,0,0-2.643,4.033,17.554,17.554,0,0,0-1.182,4.59l-3.06,18.081a23.192,23.192,0,0,0-.348,4.729,11.38,11.38,0,0,0,.974,4.381,6.567,6.567,0,0,0,2.921,3.129,8.9,8.9,0,0,0,4.381,1.113A13.776,13.776,0,0,0,366.156,87.988Zm33.588-.487h16.689l11.544-69.958H415.46V1.41h34.77L435.905,87.5h12.517v16.134H399.744V87.5ZM468.31,29.922h19.611l-1.53,9.319a31.287,31.287,0,0,1,3.825-4.451,21.413,21.413,0,0,1,4.728-3.477,19.049,19.049,0,0,1,5.425-1.947A29.445,29.445,0,0,1,506,28.809a19.093,19.093,0,0,1,6.885,1.252,13.041,13.041,0,0,1,5.424,3.825,18.154,18.154,0,0,1,3.268,5.841,30.317,30.317,0,0,1,1.461,6.746,41.152,41.152,0,0,1,.069,7.163q-0.28,3.687-.834,7.3l-3.06,18.081a48.3,48.3,0,0,1-2.712,9.457,27.978,27.978,0,0,1-5.355,8.553A22.306,22.306,0,0,1,502.8,102.8a26.069,26.069,0,0,1-9.735,1.947,20.92,20.92,0,0,1-5.563-.765,11.135,11.135,0,0,1-4.729-2.573,14.869,14.869,0,0,1-3.2-4.311,26.622,26.622,0,0,1-1.947-5.146l-6.676,40.194H451.482ZM492.163,87.71A11.256,11.256,0,0,0,496.2,85a12.482,12.482,0,0,0,2.643-4.1,24.835,24.835,0,0,0,1.321-4.52l2.921-18.081a23.24,23.24,0,0,0,.347-4.729,11.453,11.453,0,0,0-.973-4.381,6.61,6.61,0,0,0-2.921-3.129,8.932,8.932,0,0,0-4.381-1.113,13.761,13.761,0,0,0-4.1.626,9.46,9.46,0,0,0-3.616,2.086A9.791,9.791,0,0,0,485,51.062a21.249,21.249,0,0,0-1.252,3.894l-2.92,18.081q-0.28,1.669-.487,3.407a16.193,16.193,0,0,0-.07,3.338,25.184,25.184,0,0,0,.487,3.2A8.979,8.979,0,0,0,481.94,85.9a5.3,5.3,0,0,0,2.365,2.017,7.619,7.619,0,0,0,3.2.7A12.589,12.589,0,0,0,492.163,87.71Z"/>
</svg>
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 4bed5af..55cf3b3 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,6 +12,9 @@ on:
unix:
default: true
type: boolean
+ linux_static:
+ default: true
+ type: boolean
linux_arm:
default: true
type: boolean
@@ -27,9 +30,6 @@ on:
windows32:
default: true
type: boolean
- meta_files:
- default: true
- type: boolean
origin:
required: false
default: ''
@@ -52,7 +52,11 @@ on:
default: stable
type: string
unix:
- description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip
+ description: yt-dlp, yt-dlp.tar.gz
+ default: true
+ type: boolean
+ linux_static:
+ description: yt-dlp_linux
default: true
type: boolean
linux_arm:
@@ -75,10 +79,6 @@ on:
description: yt-dlp_x86.exe
default: true
type: boolean
- meta_files:
- description: SHA2-256SUMS, SHA2-512SUMS, _update_spec
- default: true
- type: boolean
origin:
description: Origin
required: false
@@ -107,60 +107,31 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
+ with:
+ fetch-depth: 0 # Needed for changelog
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- - uses: conda-incubator/setup-miniconda@v3
- with:
- miniforge-variant: Mambaforge
- use-mamba: true
- channels: conda-forge
- auto-update-conda: true
- activate-environment: ""
- auto-activate-base: false
- name: Install Requirements
run: |
sudo apt -y install zip pandoc man sed
- cat > ./requirements.txt << EOF
- python=3.10.*
- brotli-python
- EOF
- python devscripts/install_deps.py --print \
- --exclude brotli --exclude brotlicffi \
- --include secretstorage --include pyinstaller >> ./requirements.txt
- mamba create -n build --file ./requirements.txt
-
- name: Prepare
run: |
python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
+ python devscripts/update_changelog.py -vv
python devscripts/make_lazy_extractors.py
- name: Build Unix platform-independent binary
run: |
make all tar
- - name: Build Unix standalone binary
- shell: bash -l {0}
- run: |
- unset LD_LIBRARY_PATH # Harmful; set by setup-python
- conda activate build
- python -m bundle.pyinstaller --onedir
- (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
- python -m bundle.pyinstaller
- mv ./dist/yt-dlp_linux ./yt-dlp_linux
- mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
-
- name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION
run: |
- binaries=("yt-dlp" "yt-dlp_linux")
- for binary in "${binaries[@]}"; do
- chmod +x ./${binary}
- cp ./${binary} ./${binary}_downgraded
- version="$(./${binary} --version)"
- ./${binary}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
- downgraded_version="$(./${binary}_downgraded --version)"
- [[ "$version" != "$downgraded_version" ]]
- done
-
+ chmod +x ./yt-dlp
+ cp ./yt-dlp ./yt-dlp_downgraded
+ version="$(./yt-dlp --version)"
+ ./yt-dlp_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+ downgraded_version="$(./yt-dlp_downgraded --version)"
+ [[ "$version" != "$downgraded_version" ]]
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
@@ -168,8 +139,39 @@ jobs:
path: |
yt-dlp
yt-dlp.tar.gz
- yt-dlp_linux
- yt-dlp_linux.zip
+ compression-level: 0
+
+ linux_static:
+ needs: process
+ if: inputs.linux_static
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Build static executable
+ env:
+ channel: ${{ inputs.channel }}
+ origin: ${{ needs.process.outputs.origin }}
+ version: ${{ inputs.version }}
+ run: |
+ mkdir ~/build
+ cd bundle/docker
+ docker compose up --build static
+ sudo chown "${USER}:docker" ~/build/yt-dlp_linux
+ - name: Verify --update-to
+ if: vars.UPDATE_TO_VERIFICATION
+ run: |
+ chmod +x ~/build/yt-dlp_linux
+ cp ~/build/yt-dlp_linux ~/build/yt-dlp_linux_downgraded
+ version="$(~/build/yt-dlp_linux --version)"
+ ~/build/yt-dlp_linux_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
+ downgraded_version="$(~/build/yt-dlp_linux_downgraded --version)"
+ [[ "$version" != "$downgraded_version" ]]
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: build-bin-${{ github.job }}
+ path: |
+ ~/build/yt-dlp_linux
compression-level: 0
linux_arm:
@@ -247,6 +249,22 @@ jobs:
python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
# We need to ignore wheels otherwise we break universal2 builds
python3 -m pip install -U --user --no-binary :all: -r requirements.txt
+ # We need to fuse our own universal2 wheels for curl_cffi
+ python3 -m pip install -U --user delocate
+ mkdir curl_cffi_whls curl_cffi_universal2
+ python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
+ for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
+ python3 -m pip download \
+ --only-binary=:all: \
+ --platform "${platform}" \
+ --pre -d curl_cffi_whls \
+ -r requirements.txt
+ done
+ python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2
+ python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2
+ cd curl_cffi_universal2
+ for wheel in *cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done
+ python3 -m pip install -U --user *cffi*.whl
- name: Prepare
run: |
@@ -280,7 +298,7 @@ jobs:
macos_legacy:
needs: process
if: inputs.macos_legacy
- runs-on: macos-latest
+ runs-on: macos-12
steps:
- uses: actions/checkout@v4
@@ -342,7 +360,7 @@ jobs:
- name: Install Requirements
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build
- python devscripts/install_deps.py --include py2exe
+ python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
- name: Prepare
@@ -351,12 +369,20 @@ jobs:
python devscripts/make_lazy_extractors.py
- name: Build
run: |
- python -m bundle.py2exe
- Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
python -m bundle.pyinstaller
python -m bundle.pyinstaller --onedir
+ Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
+ - name: Install Requirements (py2exe)
+ run: |
+ python devscripts/install_deps.py --include py2exe
+ - name: Build (py2exe)
+ run: |
+ python -m bundle.py2exe
+ Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
+ Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe
+
- name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION
run: |
@@ -427,10 +453,11 @@ jobs:
compression-level: 0
meta_files:
- if: inputs.meta_files && always() && !cancelled()
+ if: always() && !cancelled()
needs:
- process
- unix
+ - linux_static
- linux_arm
- macos
- macos_legacy
@@ -447,8 +474,9 @@ jobs:
- name: Make SHA2-SUMS files
run: |
cd ./artifact/
- sha256sum * > ../SHA2-256SUMS
- sha512sum * > ../SHA2-512SUMS
+ # make sure SHA sums are also printed to stdout
+ sha256sum * | tee ../SHA2-256SUMS
+ sha512sum * | tee ../SHA2-512SUMS
- name: Make Update spec
run: |
diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index ba86306..fdfdebc 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -53,7 +53,7 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- name: Install test requirements
- run: python3 ./devscripts/install_deps.py --include dev
+ run: python3 ./devscripts/install_deps.py --include test --include curl-cffi
- name: Run tests
continue-on-error: False
run: |
diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml
index 3114e7b..3afb51a 100644
--- a/.github/workflows/quick-test.yml
+++ b/.github/workflows/quick-test.yml
@@ -15,21 +15,25 @@ jobs:
with:
python-version: '3.8'
- name: Install test requirements
- run: python3 ./devscripts/install_deps.py --include dev
+ run: python3 ./devscripts/install_deps.py --include test
- name: Run tests
run: |
python3 -m yt_dlp -v || true
python3 ./devscripts/run_tests.py core
- flake8:
- name: Linter
+ check:
+ name: Code check
if: "!contains(github.event.head_commit.message, 'ci skip all')"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- - name: Install flake8
- run: python3 ./devscripts/install_deps.py -o --include dev
+ with:
+ python-version: '3.8'
+ - name: Install dev dependencies
+ run: python3 ./devscripts/install_deps.py -o --include static-analysis
- name: Make lazy extractors
run: python3 ./devscripts/make_lazy_extractors.py
- - name: Run flake8
- run: flake8 .
+ - name: Run ruff
+ run: ruff check --output-format github .
+ - name: Run autopep8
+ run: autopep8 --diff .
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index fd99cec..32268b3 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -189,13 +189,8 @@ jobs:
if: |
!inputs.prerelease && env.target_repo == github.repository
run: |
+ python devscripts/update_changelog.py -vv
make doc
- sed '/### /Q' Changelog.md >> ./CHANGELOG
- echo '### ${{ env.version }}' >> ./CHANGELOG
- python ./devscripts/make_changelog.py -vv -c >> ./CHANGELOG
- echo >> ./CHANGELOG
- grep -Poz '(?s)### \d+\.\d+\.\d+.+' 'Changelog.md' | head -n -1 >> ./CHANGELOG
- cat ./CHANGELOG > Changelog.md
- name: Push to release
id: push_release
@@ -266,6 +261,7 @@ jobs:
pypi_project: ${{ needs.prepare.outputs.pypi_project }}
run: |
python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}"
+ python devscripts/update_changelog.py -vv
python devscripts/make_lazy_extractors.py
sed -i -E '0,/(name = ")[^"]+(")/s//\1${{ env.pypi_project }}\2/' pyproject.toml
diff --git a/.gitignore b/.gitignore
index 630c2e0..db322c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -67,7 +67,7 @@ cookies
# Python
*.pyc
*.pyo
-.pytest_cache
+.*_cache
wine-py2exe/
py2exe.log
build/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..a821eee
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,14 @@
+repos:
+- repo: local
+ hooks:
+ - id: linter
+ name: Apply linter fixes
+ entry: ruff check --fix .
+ language: system
+ types: [python]
+ require_serial: true
+ - id: format
+ name: Apply formatting fixes
+ entry: autopep8 --in-place .
+ language: system
+ types: [python]
diff --git a/.pre-commit-hatch.yaml b/.pre-commit-hatch.yaml
new file mode 100644
index 0000000..fb7d25e
--- /dev/null
+++ b/.pre-commit-hatch.yaml
@@ -0,0 +1,9 @@
+repos:
+- repo: local
+ hooks:
+ - id: fix
+ name: Apply code fixes
+ entry: hatch fmt
+ language: system
+ types: [python]
+ require_serial: true
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c94ec55..837b600 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -134,18 +134,53 @@ We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-a
# DEVELOPER INSTRUCTIONS
-Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation).
+Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases), get them via [the other installation methods](README.md#installation) or directly run it using `python -m yt_dlp`.
-To run yt-dlp as a developer, you don't need to build anything either. Simply execute
+`yt-dlp` uses [`hatch`](<https://hatch.pypa.io>) as a project management tool.
+You can easily install it using [`pipx`](<https://pipx.pypa.io>) via `pipx install hatch`, or else via `pip` or your package manager of choice. Make sure you are using at least version `1.10.0`, otherwise some functionality might not work as expected.
- python3 -m yt_dlp
+If you plan on contributing to `yt-dlp`, best practice is to start by running the following command:
-To run all the available core tests, use:
+```shell
+$ hatch run setup
+```
+
+The above command will install a `pre-commit` hook so that required checks/fixes (linting, formatting) will run automatically before each commit. If any code needs to be linted or formatted, then the commit will be blocked and the necessary changes will be made; you should review all edits and re-commit the fixed version.
- python3 devscripts/run_tests.py
+After this you can use `hatch shell` to enable a virtual environment that has `yt-dlp` and its development dependencies installed.
+
+In addition, the following script commands can be used to run simple tasks such as linting or testing (without having to run `hatch shell` first):
+* `hatch fmt`: Automatically fix linter violations and apply required code formatting changes
+ * See `hatch fmt --help` for more info
+* `hatch test`: Run extractor or core tests
+ * See `hatch test --help` for more info
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
+While it is strongly recommended to use `hatch` for yt-dlp development, if you are unable to do so, alternatively you can manually create a virtual environment and use the following commands:
+
+```shell
+# To only install development dependencies:
+$ python -m devscripts.install_deps --include dev
+
+# Or, for an editable install plus dev dependencies:
+$ python -m pip install -e ".[default,dev]"
+
+# To setup the pre-commit hook:
+$ pre-commit install
+
+# To be used in place of `hatch test`:
+$ python -m devscripts.run_tests
+
+# To be used in place of `hatch fmt`:
+$ ruff check --fix .
+$ autopep8 --in-place .
+
+# To only check code instead of applying fixes:
+$ ruff check .
+$ autopep8 --diff .
+```
+
If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile).
@@ -165,12 +200,16 @@ After you have ensured this site is distributing its content legally, you can fo
1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork)
1. Check out the source code with:
- git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git
+ ```shell
+ $ git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git
+ ```
1. Start a new git branch with
- cd yt-dlp
- git checkout -b yourextractor
+ ```shell
+ $ cd yt-dlp
+ $ git checkout -b yourextractor
+ ```
1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`:
@@ -217,21 +256,27 @@ After you have ensured this site is distributing its content legally, you can fo
# TODO more properties (see yt_dlp/extractor/common.py)
}
```
-1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`.
-1. Run `python3 devscripts/run_tests.py YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all`
+1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. Also note that when adding a parenthesized import group, the last import in the group must have a trailing comma in order for this formatting to be respected by our code formatter.
+1. Run `hatch test YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all`
1. Make sure you have at least one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want.
-1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
+1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions), passes [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started) code checks and is properly formatted:
+
+ ```shell
+ $ hatch fmt --check
+ ```
- $ flake8 yt_dlp/extractor/yourextractor.py
+ You can use `hatch fmt` to automatically fix problems.
1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python.
1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
- $ git add yt_dlp/extractor/_extractors.py
- $ git add yt_dlp/extractor/yourextractor.py
- $ git commit -m '[yourextractor] Add extractor'
- $ git push origin yourextractor
+ ```shell
+ $ git add yt_dlp/extractor/_extractors.py
+ $ git add yt_dlp/extractor/yourextractor.py
+ $ git commit -m '[yourextractor] Add extractor'
+ $ git push origin yourextractor
+ ```
1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 6ee3baa..e0d1668 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -600,3 +600,34 @@ xpadev-net
Xpl0itU
YoshichikaAAA
zhijinwuu
+alb
+hruzgar
+kasper93
+leoheitmannruiz
+luiso1979
+nipotan
+Offert4324
+sta1us
+Tomoka1
+trwstin
+alexhuot1
+clienthax
+DaPotato69
+emqi
+hugohaa
+imanoreotwe
+JakeFinley96
+lostfictions
+minamotorin
+ocococococ
+Podiumnoche
+RasmusAntons
+roeniss
+shoxie007
+Szpachlarz
+The-MAGI
+TuxCoder
+voidful
+vtexier
+WyohKnott
+trueauracoral
diff --git a/Changelog.md b/Changelog.md
index 45a9cef..2673302 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,6 +4,222 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
+### 2024.05.27
+
+#### Extractor changes
+- [Fix parsing of base URL in SMIL manifest](https://github.com/yt-dlp/yt-dlp/commit/26603d0b34898818992bee4598e0607c07059511) ([#9225](https://github.com/yt-dlp/yt-dlp/issues/9225)) by [seproDev](https://github.com/seproDev)
+- **peertube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/12b248ce60be1aa1362edd839d915bba70dbee4b) ([#10044](https://github.com/yt-dlp/yt-dlp/issues/10044)) by [bashonly](https://github.com/bashonly), [trueauracoral](https://github.com/trueauracoral)
+- **piksel**: [Update domain](https://github.com/yt-dlp/yt-dlp/commit/ae2194e1dd4a99d32eb3cab7c48a0ff03101ef3b) ([#9223](https://github.com/yt-dlp/yt-dlp/issues/9223)) by [seproDev](https://github.com/seproDev)
+- **tiktok**: user: [Fix extraction loop](https://github.com/yt-dlp/yt-dlp/commit/c53c2e40fde8f2e15c7c62f8ca1a5d9e90ddc079) ([#10035](https://github.com/yt-dlp/yt-dlp/issues/10035)) by [bashonly](https://github.com/bashonly)
+
+#### Misc. changes
+- **cleanup**: Miscellaneous: [5e3e19c](https://github.com/yt-dlp/yt-dlp/commit/5e3e19c93c52830da98d9d1ed84ea7a559efefbd) by [bashonly](https://github.com/bashonly)
+
+### 2024.05.26
+
+#### Core changes
+- [Better warning when requested subs format not found](https://github.com/yt-dlp/yt-dlp/commit/7e4259dff0b681a3f0e8a930799ce0394328c86e) ([#9873](https://github.com/yt-dlp/yt-dlp/issues/9873)) by [DaPotato69](https://github.com/DaPotato69)
+- [Merged with youtube-dl a08f2b7](https://github.com/yt-dlp/yt-dlp/commit/a4da9db87b6486b270c15dfa07ab5bfedc83f6bd) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+- [Warn if lack of ffmpeg alters format selection](https://github.com/yt-dlp/yt-dlp/commit/96da9525043f78aca4544d01761b13b2140e9ae6) ([#9805](https://github.com/yt-dlp/yt-dlp/issues/9805)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+- **cookies**
+ - [Add `--cookies-from-browser` support for Whale](https://github.com/yt-dlp/yt-dlp/commit/dd9ad97b1fbdd36c086b8ba82328a4d954f78f8e) ([#9649](https://github.com/yt-dlp/yt-dlp/issues/9649)) by [roeniss](https://github.com/roeniss)
+ - [Get chrome session cookies with `--cookies-from-browser`](https://github.com/yt-dlp/yt-dlp/commit/f1f158976e38d38a260762accafe7bbe6d451151) ([#9747](https://github.com/yt-dlp/yt-dlp/issues/9747)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
+- **windows**: [Improve shell quoting and tests](https://github.com/yt-dlp/yt-dlp/commit/64766459e37451b665c1464073c28361fbcf1c25) ([#9802](https://github.com/yt-dlp/yt-dlp/issues/9802)) by [Grub4K](https://github.com/Grub4K) (With fixes in [7e26bd5](https://github.com/yt-dlp/yt-dlp/commit/7e26bd53f9c5893518fde81dfd0079ec08dd841e))
+
+#### Extractor changes
+- [Add POST data hash to `--write-pages` filenames](https://github.com/yt-dlp/yt-dlp/commit/61b17437dc14a1c7e90ff48a6198df77828c6df4) ([#9879](https://github.com/yt-dlp/yt-dlp/issues/9879)) by [minamotorin](https://github.com/minamotorin) (With fixes in [c999bac](https://github.com/yt-dlp/yt-dlp/commit/c999bac02c5a4f755b2a82488a975e91c988ffd8) by [bashonly](https://github.com/bashonly))
+- [Make `_search_nextjs_data` non fatal](https://github.com/yt-dlp/yt-dlp/commit/3ee1194288981c4f2c4abd8315326de0c424d2ce) ([#8937](https://github.com/yt-dlp/yt-dlp/issues/8937)) by [Grub4K](https://github.com/Grub4K)
+- **afreecatv**: live: [Add `cdn` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/315b3544296bb83012e20ee3af9d3cbf5600dd1c) ([#9666](https://github.com/yt-dlp/yt-dlp/issues/9666)) by [bashonly](https://github.com/bashonly)
+- **alura**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fc2879ecb05aaad36869609d154e4321362c1f63) ([#9658](https://github.com/yt-dlp/yt-dlp/issues/9658)) by [hugohaa](https://github.com/hugohaa)
+- **artetv**: [Label forced subtitles](https://github.com/yt-dlp/yt-dlp/commit/7b5674949fd03a33b47b67b31d56a5adf1c48c91) ([#9945](https://github.com/yt-dlp/yt-dlp/issues/9945)) by [vtexier](https://github.com/vtexier)
+- **bbc**: [Fix and extend extraction](https://github.com/yt-dlp/yt-dlp/commit/7975ddf245d22af034d5b983eeb1c5ec6c2ce053) ([#9705](https://github.com/yt-dlp/yt-dlp/issues/9705)) by [dirkf](https://github.com/dirkf), [kylegustavo](https://github.com/kylegustavo), [pukkandan](https://github.com/pukkandan)
+- **bilibili**: [Fix `--geo-verification-proxy` support](https://github.com/yt-dlp/yt-dlp/commit/2338827072dacab0f15348b70aec8685feefc8d1) ([#9817](https://github.com/yt-dlp/yt-dlp/issues/9817)) by [fireattack](https://github.com/fireattack)
+- **bilibilispacevideo**
+ - [Better error message](https://github.com/yt-dlp/yt-dlp/commit/06d52c87314e0bbc16c43c405090843885577b88) ([#9839](https://github.com/yt-dlp/yt-dlp/issues/9839)) by [fireattack](https://github.com/fireattack)
+ - [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/4cc99d7b6cce8b39506ead01407445d576b63ee4) ([#9905](https://github.com/yt-dlp/yt-dlp/issues/9905)) by [c-basalt](https://github.com/c-basalt)
+- **boosty**: [Add cookies support](https://github.com/yt-dlp/yt-dlp/commit/145dc6f6563e80d2da1b3e9aea2ffa795b71622c) ([#9522](https://github.com/yt-dlp/yt-dlp/issues/9522)) by [RasmusAntons](https://github.com/RasmusAntons)
+- **brilliantpala**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/eead3bbc01f6529862bdad1f0b2adeabda4f006e) ([#9788](https://github.com/yt-dlp/yt-dlp/issues/9788)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **canalalpha**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/00a9f2e1f7fa69499221f2e8dd73a08efeef79bc) ([#9675](https://github.com/yt-dlp/yt-dlp/issues/9675)) by [kclauhk](https://github.com/kclauhk)
+- **cbc.ca**: player: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8bf48f3a8fa29587e7c73ef5a7710385a5ea725) ([#9866](https://github.com/yt-dlp/yt-dlp/issues/9866)) by [carusocr](https://github.com/carusocr)
+- **cda**: [Fix age-gated web extraction](https://github.com/yt-dlp/yt-dlp/commit/6d8a53d870ff6795f509085bfbf3981417999038) ([#9939](https://github.com/yt-dlp/yt-dlp/issues/9939)) by [dirkf](https://github.com/dirkf), [emqi](https://github.com/emqi), [Podiumnoche](https://github.com/Podiumnoche), [Szpachlarz](https://github.com/Szpachlarz)
+- **commonmistakes**: [Raise error on blob URLs](https://github.com/yt-dlp/yt-dlp/commit/98d71d8c5e5dab08b561ee6f137e968d2a004262) ([#9897](https://github.com/yt-dlp/yt-dlp/issues/9897)) by [seproDev](https://github.com/seproDev)
+- **crunchyroll**
+ - [Always make metadata available](https://github.com/yt-dlp/yt-dlp/commit/cb2fb4a643949322adba561ca73bcba3221ec0c5) ([#9772](https://github.com/yt-dlp/yt-dlp/issues/9772)) by [bashonly](https://github.com/bashonly)
+ - [Fix auth and remove cookies support](https://github.com/yt-dlp/yt-dlp/commit/ff38a011d57b763f3a69bebd25a5dc9044a717ce) ([#9749](https://github.com/yt-dlp/yt-dlp/issues/9749)) by [bashonly](https://github.com/bashonly)
+ - [Fix stream extraction](https://github.com/yt-dlp/yt-dlp/commit/f2816634e3be88fe158b342ee33918de3c272a54) ([#10005](https://github.com/yt-dlp/yt-dlp/issues/10005)) by [bashonly](https://github.com/bashonly)
+ - [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/5904853ae5788509fdc4892cb7ecdfa9ae7f78e6) ([#9857](https://github.com/yt-dlp/yt-dlp/issues/9857)) by [bashonly](https://github.com/bashonly)
+- **dangalplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0d067e77c3f5527946fb0c22ee1c7011994cba40) ([#10021](https://github.com/yt-dlp/yt-dlp/issues/10021)) by [bashonly](https://github.com/bashonly)
+- **discoveryplus**: [Fix dmax.de and related extractors](https://github.com/yt-dlp/yt-dlp/commit/90d2da311bbb5dc06f385ee428c7e4590936e995) ([#10020](https://github.com/yt-dlp/yt-dlp/issues/10020)) by [bashonly](https://github.com/bashonly)
+- **eplus**: [Handle URLs without videos](https://github.com/yt-dlp/yt-dlp/commit/351dc0bc334c4e1b5f00c152818c3ec0ed71f788) ([#9855](https://github.com/yt-dlp/yt-dlp/issues/9855)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **europarlwebstream**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/800a43983e5fb719526ce4cb3956216085c63268) ([#9647](https://github.com/yt-dlp/yt-dlp/issues/9647)) by [seproDev](https://github.com/seproDev), [voidful](https://github.com/voidful)
+- **facebook**: [Fix DASH formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e3b42d8b1b8bcfff7ba146c19fc3f6f6ba843cea) ([#9734](https://github.com/yt-dlp/yt-dlp/issues/9734)) by [bashonly](https://github.com/bashonly)
+- **godresource**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65e709d23530959075816e966c42179ad46e8e3b) ([#9629](https://github.com/yt-dlp/yt-dlp/issues/9629)) by [HobbyistDev](https://github.com/HobbyistDev)
+- **googledrive**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/85ec2a337ac325cf6427cbafd56f0a034c1a5218) ([#9908](https://github.com/yt-dlp/yt-dlp/issues/9908)) by [WyohKnott](https://github.com/WyohKnott)
+- **hearthisat**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5bbfdb7c999b22f1aeca0c3489c167d6eb73013b) ([#9949](https://github.com/yt-dlp/yt-dlp/issues/9949)) by [bohwaz](https://github.com/bohwaz), [seproDev](https://github.com/seproDev)
+- **hytale**: [Use `CloudflareStreamIE` explicitly](https://github.com/yt-dlp/yt-dlp/commit/31b417e1d1ccc67d5c027bf8878f483dc34cb118) ([#9672](https://github.com/yt-dlp/yt-dlp/issues/9672)) by [llamasblade](https://github.com/llamasblade)
+- **instagram**: [Support `/reels/` URLs](https://github.com/yt-dlp/yt-dlp/commit/06cb0638392b607b47d3c2ac48eb2ebecb0f060d) ([#9539](https://github.com/yt-dlp/yt-dlp/issues/9539)) by [amir16yp](https://github.com/amir16yp)
+- **jiocinema**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1463945ae5fb05986a0bd1aa02e41d1a08d93a02) ([#10026](https://github.com/yt-dlp/yt-dlp/issues/10026)) by [bashonly](https://github.com/bashonly)
+- **jiosaavn**: [Extract via API and fix playlists](https://github.com/yt-dlp/yt-dlp/commit/0c21c53885cf03f4040467ae8c44d7ff51016116) ([#9656](https://github.com/yt-dlp/yt-dlp/issues/9656)) by [bashonly](https://github.com/bashonly)
+- **lci**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a2eebc76770fca91ffabeff658d560f716fec80) ([#10025](https://github.com/yt-dlp/yt-dlp/issues/10025)) by [ocococococ](https://github.com/ocococococ)
+- **mixch**: [Extract comments](https://github.com/yt-dlp/yt-dlp/commit/b38018b781b062d5169d104ab430489aef8e7f1e) ([#9860](https://github.com/yt-dlp/yt-dlp/issues/9860)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **moviepilot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/296df0da1d38a44d34c99b60a18066c301774537) ([#9366](https://github.com/yt-dlp/yt-dlp/issues/9366)) by [panatexxa](https://github.com/panatexxa)
+- **netease**: program: [Improve `--no-playlist` message](https://github.com/yt-dlp/yt-dlp/commit/73f12119b52d98281804b0c072b2ed6aa841ec88) ([#9488](https://github.com/yt-dlp/yt-dlp/issues/9488)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **nfb**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0a1a8e3005f66c44bf67633dccd4df19c3fccd1a) ([#9650](https://github.com/yt-dlp/yt-dlp/issues/9650)) by [rrgomes](https://github.com/rrgomes)
+- **ntslive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be7db1a5a8c483726c511c30ea4689cbb8b27962) ([#9641](https://github.com/yt-dlp/yt-dlp/issues/9641)) by [lostfictions](https://github.com/lostfictions)
+- **orf**: on: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/0dd53faeca2ba0ce138e4092d07b5f2dbf2422f9) ([#9677](https://github.com/yt-dlp/yt-dlp/issues/9677)) by [TuxCoder](https://github.com/TuxCoder)
+- **orftvthek**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/3779f2a307ba3ef1d28e107cdd71b221dfb4eb36) ([#10011](https://github.com/yt-dlp/yt-dlp/issues/10011)) by [seproDev](https://github.com/seproDev)
+- **patreon**
+ - [Extract multiple embeds](https://github.com/yt-dlp/yt-dlp/commit/036e0d92c6052465673d459678322ea03e61483d) ([#9850](https://github.com/yt-dlp/yt-dlp/issues/9850)) by [bashonly](https://github.com/bashonly)
+ - [Fix Vimeo embed extraction](https://github.com/yt-dlp/yt-dlp/commit/c9ce57d9bf51541da2381d99bc096a9d0ddf1f27) ([#9712](https://github.com/yt-dlp/yt-dlp/issues/9712)) by [bashonly](https://github.com/bashonly)
+- **piapro**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3ba8de62d61d782256f5c1e9939a0762039657de) ([#9311](https://github.com/yt-dlp/yt-dlp/issues/9311)) by [FinnRG](https://github.com/FinnRG), [seproDev](https://github.com/seproDev)
+- **pornhub**: [Fix login by email address](https://github.com/yt-dlp/yt-dlp/commit/518c1afc1592cae3e4eb39dc646b5bc059333112) ([#9914](https://github.com/yt-dlp/yt-dlp/issues/9914)) by [feederbox826](https://github.com/feederbox826)
+- **qub**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b54cccdcb892bca3e55993480d8b86f1c7e6da6) ([#7019](https://github.com/yt-dlp/yt-dlp/issues/7019)) by [alexhuot1](https://github.com/alexhuot1), [dirkf](https://github.com/dirkf)
+- **reddit**: [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/82f4f4444e26daf35b7302c406fe2312f78f619e) ([#10006](https://github.com/yt-dlp/yt-dlp/issues/10006)) by [kclauhk](https://github.com/kclauhk)
+- **soundcloud**
+ - [Add `formats` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/beaf832c7a9d57833f365ce18f6115b88071b296) ([#10004](https://github.com/yt-dlp/yt-dlp/issues/10004)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+ - [Extract `genres`](https://github.com/yt-dlp/yt-dlp/commit/231c2eacc41b06b65c63edf94c0d04768a5da607) ([#9821](https://github.com/yt-dlp/yt-dlp/issues/9821)) by [bashonly](https://github.com/bashonly)
+- **taptap**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/63b569bc5e7d461753637a20ad84a575adee4c0a) ([#9776](https://github.com/yt-dlp/yt-dlp/issues/9776)) by [c-basalt](https://github.com/c-basalt)
+- **tele5**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/c92e4e625e9e6bbbbf8e3b20c3e7ebe57c16072d) ([#10024](https://github.com/yt-dlp/yt-dlp/issues/10024)) by [bashonly](https://github.com/bashonly)
+- **theatercomplextown**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/8056a3026ed6ec6a6d0ed56fdd7ebcd16e928341) ([#9754](https://github.com/yt-dlp/yt-dlp/issues/9754)) by [bashonly](https://github.com/bashonly)
+- **tiktok**
+ - [Add `device_id` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/3584b8390bd21c0393a3079eeee71aed56a1c1d8) ([#9951](https://github.com/yt-dlp/yt-dlp/issues/9951)) by [bashonly](https://github.com/bashonly)
+ - [Extract all web formats](https://github.com/yt-dlp/yt-dlp/commit/4ccd73fea0f6f4be343e1ec7f22dd03799addcf8) ([#9960](https://github.com/yt-dlp/yt-dlp/issues/9960)) by [bashonly](https://github.com/bashonly)
+ - [Extract via mobile API only if extractor-arg is passed](https://github.com/yt-dlp/yt-dlp/commit/41ba4a808b597a3afed78c89675a30deb6844450) ([#9938](https://github.com/yt-dlp/yt-dlp/issues/9938)) by [bashonly](https://github.com/bashonly)
+ - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/eef1e9f44ff14c5e65b759bb1eafa3946cdaf719) ([#9961](https://github.com/yt-dlp/yt-dlp/issues/9961)) by [bashonly](https://github.com/bashonly)
+ - collection: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/119d41f27061d220d276a2d38cfc8d873437452a) ([#9986](https://github.com/yt-dlp/yt-dlp/issues/9986)) by [bashonly](https://github.com/bashonly), [imanoreotwe](https://github.com/imanoreotwe)
+ - user: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/347f13dd9bccc2b4db3ea25689410d45d8370ed4) ([#9661](https://github.com/yt-dlp/yt-dlp/issues/9661)) by [bashonly](https://github.com/bashonly)
+- **tv5monde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6db96268c521e945d42649607db1574f5d92e082) ([#9143](https://github.com/yt-dlp/yt-dlp/issues/9143)) by [alard](https://github.com/alard), [seproDev](https://github.com/seproDev)
+- **twitter**
+ - [Fix auth for x.com migration](https://github.com/yt-dlp/yt-dlp/commit/3e35aa32c74bc108375be8c8b6b3bfc90dfff1b4) ([#9952](https://github.com/yt-dlp/yt-dlp/issues/9952)) by [bashonly](https://github.com/bashonly)
+ - [Support x.com URLs](https://github.com/yt-dlp/yt-dlp/commit/4813173e4544f125d6f2afc31e600727d761b8dd) ([#9926](https://github.com/yt-dlp/yt-dlp/issues/9926)) by [bashonly](https://github.com/bashonly)
+- **vk**: [Improve format extraction](https://github.com/yt-dlp/yt-dlp/commit/df5c9e733aaba703cf285c0372b6d61629330c82) ([#9885](https://github.com/yt-dlp/yt-dlp/issues/9885)) by [seproDev](https://github.com/seproDev)
+- **wrestleuniverse**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/c4853655cb9a793129280806af643de43c48f4d5) ([#9800](https://github.com/yt-dlp/yt-dlp/issues/9800)) by [bashonly](https://github.com/bashonly)
+- **xiaohongshu**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a2e9031605d87c469be9ce98dbbdf4960b727338) ([#9646](https://github.com/yt-dlp/yt-dlp/issues/9646)) by [HobbyistDev](https://github.com/HobbyistDev)
+- **xvideos**: quickies: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b207d26f83fb8ab0ce56df74dff43ff583a3264f) ([#9834](https://github.com/yt-dlp/yt-dlp/issues/9834)) by [JakeFinley96](https://github.com/JakeFinley96)
+- **youporn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/351368cb9a6731b886a58f5a10fd6b302bbe47be) ([#8827](https://github.com/yt-dlp/yt-dlp/issues/8827)) by [The-MAGI](https://github.com/The-MAGI)
+- **youtube**
+ - [Add `mediaconnect` client](https://github.com/yt-dlp/yt-dlp/commit/cf212d0a331aba05c32117573f760cdf3af8c62f) ([#9546](https://github.com/yt-dlp/yt-dlp/issues/9546)) by [clienthax](https://github.com/clienthax)
+ - [Extract upload timestamp if available](https://github.com/yt-dlp/yt-dlp/commit/96a134dea6397a5f2131947c427aac52c8b4e677) ([#9856](https://github.com/yt-dlp/yt-dlp/issues/9856)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/8e15177b4113c355989881e4e030f695a9b59c3a) ([#9775](https://github.com/yt-dlp/yt-dlp/issues/9775)) by [bbilly1](https://github.com/bbilly1), [jakeogh](https://github.com/jakeogh), [minamotorin](https://github.com/minamotorin), [shoxie007](https://github.com/shoxie007)
+ - [Remove `android` from default clients](https://github.com/yt-dlp/yt-dlp/commit/12d8ea8246fa901de302ff5cc748caddadc82f41) ([#9553](https://github.com/yt-dlp/yt-dlp/issues/9553)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
+- **zenyandex**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c4b87dd885ee5391e5f481e7c8bd550a7c543623) ([#9813](https://github.com/yt-dlp/yt-dlp/issues/9813)) by [src-tinkerer](https://github.com/src-tinkerer)
+
+#### Networking changes
+- [Add `extensions` attribute to `Response`](https://github.com/yt-dlp/yt-dlp/commit/bec9a59e8ec82c18e3bf9268eaa436793dd52e35) ([#9756](https://github.com/yt-dlp/yt-dlp/issues/9756)) by [bashonly](https://github.com/bashonly)
+- **Request Handler**
+ - requests
+ - [Patch support for `requests` 2.32.2+](https://github.com/yt-dlp/yt-dlp/commit/3f7999533ebe41c2a579d91b4e4cb211cfcd3bc0) ([#9992](https://github.com/yt-dlp/yt-dlp/issues/9992)) by [Grub4K](https://github.com/Grub4K)
+ - [Update to `requests` 2.32.0](https://github.com/yt-dlp/yt-dlp/commit/c36513f1be2ef3d3cec864accbffda1afaa06ffd) ([#9980](https://github.com/yt-dlp/yt-dlp/issues/9980)) by [coletdjnz](https://github.com/coletdjnz)
+
+#### Misc. changes
+- [Add `hatch`, `ruff`, `pre-commit` and improve dev docs](https://github.com/yt-dlp/yt-dlp/commit/e897bd8292a41999cf51dba91b390db5643c72db) ([#7409](https://github.com/yt-dlp/yt-dlp/issues/7409)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
+- **build**
+ - [Migrate `linux_exe` to static musl builds](https://github.com/yt-dlp/yt-dlp/commit/ac817bc83efd939dca3e40c4b527d0ccfc77172b) ([#9811](https://github.com/yt-dlp/yt-dlp/issues/9811)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+ - [Normalize `curl_cffi` group to `curl-cffi`](https://github.com/yt-dlp/yt-dlp/commit/02483bea1c4dbe1bace8ca4d19700104fbb8a00f) ([#9698](https://github.com/yt-dlp/yt-dlp/issues/9698)) by [bashonly](https://github.com/bashonly) (With fixes in [89f535e](https://github.com/yt-dlp/yt-dlp/commit/89f535e2656964b4061c25a7739d4d6ba0a30568))
+ - [Run `macos_legacy` job on `macos-12`](https://github.com/yt-dlp/yt-dlp/commit/1a366403d9c26b992faa77e00f4d02ead57559e3) ([#9804](https://github.com/yt-dlp/yt-dlp/issues/9804)) by [bashonly](https://github.com/bashonly)
+ - [`macos` job requires `setuptools<70`](https://github.com/yt-dlp/yt-dlp/commit/78c57cc0e0998b8ed90e4306f410aa4be4115cd7) ([#9993](https://github.com/yt-dlp/yt-dlp/issues/9993)) by [bashonly](https://github.com/bashonly)
+- **cleanup**
+ - [Remove questionable extractors](https://github.com/yt-dlp/yt-dlp/commit/01395a34345d1c6ba1b73ca92f94dd200dc45341) ([#9911](https://github.com/yt-dlp/yt-dlp/issues/9911)) by [seproDev](https://github.com/seproDev)
+ - Miscellaneous: [5c019f6](https://github.com/yt-dlp/yt-dlp/commit/5c019f6328ad40d66561eac3c4de0b3cd070d0f6), [ae2af11](https://github.com/yt-dlp/yt-dlp/commit/ae2af1104f80caf2f47544763a33db2c17a3e1de) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
+- **test**
+ - [Add HTTP proxy tests](https://github.com/yt-dlp/yt-dlp/commit/3c7a287e281d9f9a353dce8902ff78a84c24a040) ([#9578](https://github.com/yt-dlp/yt-dlp/issues/9578)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Fix connect timeout test](https://github.com/yt-dlp/yt-dlp/commit/53b4d44f55cca66ac33dab092ef2a30b1164b684) ([#9906](https://github.com/yt-dlp/yt-dlp/issues/9906)) by [coletdjnz](https://github.com/coletdjnz)
+
+### 2024.04.09
+
+#### Important changes
+- Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)
+ - The shell escape function now properly escapes `%`, `\` and `\n`.
+ - `utils.Popen` has been patched accordingly.
+
+#### Core changes
+- [Add new option `--progress-delta`](https://github.com/yt-dlp/yt-dlp/commit/9590cc6b4768e190183d7d071a6c78170889116a) ([#9082](https://github.com/yt-dlp/yt-dlp/issues/9082)) by [Grub4K](https://github.com/Grub4K)
+- [Add new options `--impersonate` and `--list-impersonate-targets`](https://github.com/yt-dlp/yt-dlp/commit/0b81d4d252bd065ccd352722987ea34fe17f9244) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+- [Add option `--no-break-on-existing`](https://github.com/yt-dlp/yt-dlp/commit/16be117729150b2784f3b17755c886cb0cf73374) ([#9610](https://github.com/yt-dlp/yt-dlp/issues/9610)) by [bashonly](https://github.com/bashonly)
+- [Fix `filesize_approx` calculation](https://github.com/yt-dlp/yt-dlp/commit/86e3b82261e8ebc6c6707c09544c9dfb8907c0fd) ([#9560](https://github.com/yt-dlp/yt-dlp/issues/9560)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+- [Infer `acodec` for single-codec containers](https://github.com/yt-dlp/yt-dlp/commit/86a972033e05fea80e5fe7f2aff6723dbe2f3952) by [pukkandan](https://github.com/pukkandan)
+- [Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)](https://github.com/yt-dlp/yt-dlp/commit/ff07792676f404ffff6ee61b5638c9dc1a33a37a) by [Grub4K](https://github.com/Grub4K)
+- **cookies**: [Add `--cookies-from-browser` support for Firefox Flatpak](https://github.com/yt-dlp/yt-dlp/commit/2ab2651a4a7be18939e2b4cb21be79fe477c797a) ([#9619](https://github.com/yt-dlp/yt-dlp/issues/9619)) by [un-def](https://github.com/un-def)
+- **utils**
+ - `traverse_obj`
+ - [Allow unbranching using `all` and `any`](https://github.com/yt-dlp/yt-dlp/commit/3699eeb67cad333272b14a42dd3843d93fda1a2e) ([#9571](https://github.com/yt-dlp/yt-dlp/issues/9571)) by [Grub4K](https://github.com/Grub4K)
+ - [Convenience improvements](https://github.com/yt-dlp/yt-dlp/commit/32abfb00bdbd119ca675fdc6d1719331f0a2741a) ([#9577](https://github.com/yt-dlp/yt-dlp/issues/9577)) by [Grub4K](https://github.com/Grub4K)
+
+#### Extractor changes
+- [Add extractor impersonate API](https://github.com/yt-dlp/yt-dlp/commit/50c29352312f5662acf9a64b0012766f5c40af61) ([#9474](https://github.com/yt-dlp/yt-dlp/issues/9474)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+- **afreecatv**
+ - [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/9415f1a5ef88482ebafe3083e8bcb778ac512df7) ([#9566](https://github.com/yt-dlp/yt-dlp/issues/9566)) by [bashonly](https://github.com/bashonly), [Tomoka1](https://github.com/Tomoka1)
+ - live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9073ae6458f4c6a832aa832c67174c61852869be) ([#9348](https://github.com/yt-dlp/yt-dlp/issues/9348)) by [hui1601](https://github.com/hui1601)
+- **asobistage**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0284f1fee202302a78888420f933deae19d9f4e1) ([#8735](https://github.com/yt-dlp/yt-dlp/issues/8735)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **box**: [Support URLs without file IDs](https://github.com/yt-dlp/yt-dlp/commit/07f5b2f7570fd9ac85aed17f4c0118f6eac77beb) ([#9504](https://github.com/yt-dlp/yt-dlp/issues/9504)) by [shreyasminocha](https://github.com/shreyasminocha)
+- **cbc.ca**: player: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/b49d5ffc53a72d8245ba319ff07bdc5b8c6a4f0c) ([#9561](https://github.com/yt-dlp/yt-dlp/issues/9561)) by [trainman261](https://github.com/trainman261)
+- **crunchyroll**
+ - [Extract `vo_adaptive_hls` formats by default](https://github.com/yt-dlp/yt-dlp/commit/be77923ffe842f667971019460f6005f3cad01eb) ([#9447](https://github.com/yt-dlp/yt-dlp/issues/9447)) by [bashonly](https://github.com/bashonly)
+ - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/954e57e405f79188450eb30103a9308732cd318f) ([#9615](https://github.com/yt-dlp/yt-dlp/issues/9615)) by [bytedream](https://github.com/bytedream)
+- **dropbox**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/a48cc86d6f6b20427553620c2ddb990ede6a4b41) ([#9627](https://github.com/yt-dlp/yt-dlp/issues/9627)) by [bashonly](https://github.com/bashonly)
+- **fathom**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/bc2b8c0596fd6b75af24822c4f0f1da6783d71f7) ([#9495](https://github.com/yt-dlp/yt-dlp/issues/9495)) by [src-tinkerer](https://github.com/src-tinkerer)
+- **gofile**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0da66980d3193cad3dae0120cddddbfcabddf7a1) ([#9446](https://github.com/yt-dlp/yt-dlp/issues/9446)) by [jazz1611](https://github.com/jazz1611)
+- **imgur**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/86d2f4d24849af0d1f3af7c0e2ac43bf8a058f74) ([#9471](https://github.com/yt-dlp/yt-dlp/issues/9471)) by [trwstin](https://github.com/trwstin)
+- **jiosaavn**
+ - [Extract artists](https://github.com/yt-dlp/yt-dlp/commit/0ae16ceb1846cc4e609b70ce7c5d8e7458efceb2) ([#9612](https://github.com/yt-dlp/yt-dlp/issues/9612)) by [bashonly](https://github.com/bashonly)
+ - [Fix format extensions](https://github.com/yt-dlp/yt-dlp/commit/443e206ec41e64ca2aef61d8ef91640fb69b3113) ([#9609](https://github.com/yt-dlp/yt-dlp/issues/9609)) by [bashonly](https://github.com/bashonly)
+ - [Support playlists](https://github.com/yt-dlp/yt-dlp/commit/2e94602f241f6e41bdc48576c61089435529339b) ([#9622](https://github.com/yt-dlp/yt-dlp/issues/9622)) by [bashonly](https://github.com/bashonly)
+- **joqrag**: [Fix live status detection](https://github.com/yt-dlp/yt-dlp/commit/f2fd449b46c4058222e1744f7a35caa20b2d003d) ([#9624](https://github.com/yt-dlp/yt-dlp/issues/9624)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **kick**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/c8a61a910096c77ce08dad5e1b2fbda5eb964156) ([#9611](https://github.com/yt-dlp/yt-dlp/issues/9611)) by [bashonly](https://github.com/bashonly)
+- **loom**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/f859ed3ba1e8b129ae6a467592c65687e73fbca1) ([#8686](https://github.com/yt-dlp/yt-dlp/issues/8686)) by [bashonly](https://github.com/bashonly), [hruzgar](https://github.com/hruzgar)
+- **medici**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4cd9e251b9abada107b10830de997bf4d79ca369) ([#9518](https://github.com/yt-dlp/yt-dlp/issues/9518)) by [Offert4324](https://github.com/Offert4324)
+- **mixch**
+ - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4c3b7a0769706f7f0ea24adf1f219d5ae82d2b07) ([#9608](https://github.com/yt-dlp/yt-dlp/issues/9608)) by [bashonly](https://github.com/bashonly), [nipotan](https://github.com/nipotan)
+ - archive: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c59de48e2bb4c681b03b93b584a05f52609ce4a0) ([#8761](https://github.com/yt-dlp/yt-dlp/issues/8761)) by [pzhlkj6612](https://github.com/pzhlkj6612)
+- **nhk**: [Fix NHK World extractors](https://github.com/yt-dlp/yt-dlp/commit/4af9d5c2f6aa81403ae2a8a5ae3cc824730f0b86) ([#9623](https://github.com/yt-dlp/yt-dlp/issues/9623)) by [bashonly](https://github.com/bashonly)
+- **patreon**: [Do not extract dead embed URLs](https://github.com/yt-dlp/yt-dlp/commit/36b240f9a72af57eb2c9d927ebb7fd1c917ebf18) ([#9613](https://github.com/yt-dlp/yt-dlp/issues/9613)) by [johnvictorfs](https://github.com/johnvictorfs)
+- **radio1be**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/36baaa10e06715ccba06b78885b2042c4844c826) ([#9122](https://github.com/yt-dlp/yt-dlp/issues/9122)) by [HobbyistDev](https://github.com/HobbyistDev)
+- **sharepoint**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ff349ff94aae0b2b148bd3670f7c91d39c2f1d8e) ([#6531](https://github.com/yt-dlp/yt-dlp/issues/6531)) by [bashonly](https://github.com/bashonly), [C0D3D3V](https://github.com/C0D3D3V)
+- **sonylivseries**: [Fix season extraction](https://github.com/yt-dlp/yt-dlp/commit/f2868b26e917354203f82a370ad2396646edb813) ([#9423](https://github.com/yt-dlp/yt-dlp/issues/9423)) by [bashonly](https://github.com/bashonly)
+- **soundcloud**
+ - [Adjust format sorting](https://github.com/yt-dlp/yt-dlp/commit/a2d0840739cddd585d24e0ce4796394fc8a4fa2e) ([#9584](https://github.com/yt-dlp/yt-dlp/issues/9584)) by [bashonly](https://github.com/bashonly)
+ - [Support cookies](https://github.com/yt-dlp/yt-dlp/commit/97362712a1f2b04e735bdf54f749ad99165a62fe) ([#9586](https://github.com/yt-dlp/yt-dlp/issues/9586)) by [bashonly](https://github.com/bashonly)
+ - [Support retries for API rate-limit](https://github.com/yt-dlp/yt-dlp/commit/246571ae1d867df8bf31a056bdf3bbbfd398366a) ([#9585](https://github.com/yt-dlp/yt-dlp/issues/9585)) by [bashonly](https://github.com/bashonly)
+- **thisoldhouse**: [Support Brightcove embeds](https://github.com/yt-dlp/yt-dlp/commit/0df63cce69026d2f4c0cbb4dd36163e83eac93dc) ([#9576](https://github.com/yt-dlp/yt-dlp/issues/9576)) by [bashonly](https://github.com/bashonly)
+- **tiktok**
+ - [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/cb61e20c266facabb7a30f9ce53bd79dfc158475) ([#9548](https://github.com/yt-dlp/yt-dlp/issues/9548)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
+ - [Prefer non-bytevc2 formats](https://github.com/yt-dlp/yt-dlp/commit/63f685f341f35f6f02b0368d1ba53bdb5b520410) ([#9575](https://github.com/yt-dlp/yt-dlp/issues/9575)) by [bashonly](https://github.com/bashonly)
+ - [Restore `carrier_region` API parameter](https://github.com/yt-dlp/yt-dlp/commit/fc53ec13ff1ee926a3e533a68cfca8acc887b661) ([#9637](https://github.com/yt-dlp/yt-dlp/issues/9637)) by [bashonly](https://github.com/bashonly)
+ - [Update API hostname](https://github.com/yt-dlp/yt-dlp/commit/8c05b3ebae23c5b444857549a85b84004c01a536) ([#9444](https://github.com/yt-dlp/yt-dlp/issues/9444)) by [bashonly](https://github.com/bashonly)
+- **twitch**: [Extract AV1 and HEVC formats](https://github.com/yt-dlp/yt-dlp/commit/02f93ff51b3ff9436d60c4993562b366eaae8851) ([#9158](https://github.com/yt-dlp/yt-dlp/issues/9158)) by [kasper93](https://github.com/kasper93)
+- **vkplay**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/b15b0c1d2106437ec61a5c436c543e8760eac160) ([#9636](https://github.com/yt-dlp/yt-dlp/issues/9636)) by [bashonly](https://github.com/bashonly)
+- **xvideos**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/aa7e9ae4f48276bd5d0173966c77db9484f65a0a) ([#9502](https://github.com/yt-dlp/yt-dlp/issues/9502)) by [sta1us](https://github.com/sta1us)
+- **youtube**
+ - [Calculate more accurate `filesize`](https://github.com/yt-dlp/yt-dlp/commit/a25a424323267e3f6f9f63c0b62df499bd7b8d46) by [pukkandan](https://github.com/pukkandan)
+ - [Update `android` params](https://github.com/yt-dlp/yt-dlp/commit/e7b17fce14775bd2448695c8eb7379b8d31d3537) by [pukkandan](https://github.com/pukkandan)
+ - search: [Fix params for uncensored results](https://github.com/yt-dlp/yt-dlp/commit/17d248a58781e2588d18a5ebe00c441d10011fcd) ([#9456](https://github.com/yt-dlp/yt-dlp/issues/9456)) by [alb](https://github.com/alb), [pukkandan](https://github.com/pukkandan)
+
+#### Downloader changes
+- **ffmpeg**: [Accept output args from info dict](https://github.com/yt-dlp/yt-dlp/commit/9c42b7eef547e826e9fcc7beb6706a2523949d05) ([#9278](https://github.com/yt-dlp/yt-dlp/issues/9278)) by [bashonly](https://github.com/bashonly)
+
+#### Networking changes
+- [Respect `SSLKEYLOGFILE` environment variable](https://github.com/yt-dlp/yt-dlp/commit/79a451e5763eda8b10d00684d5d3378f3255ee01) ([#9543](https://github.com/yt-dlp/yt-dlp/issues/9543)) by [luiso1979](https://github.com/luiso1979)
+- **Request Handler**
+ - curlcffi: [Add support for `curl_cffi`](https://github.com/yt-dlp/yt-dlp/commit/52f5be1f1e0dc45bb397ab950f564721976a39bf) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
+ - websockets: [Workaround race condition causing issues on PyPy](https://github.com/yt-dlp/yt-dlp/commit/e5d4f11104ce7ea1717a90eea82c0f7d230ea5d5) ([#9514](https://github.com/yt-dlp/yt-dlp/issues/9514)) by [coletdjnz](https://github.com/coletdjnz)
+
+#### Misc. changes
+- **build**
+ - [Do not include `curl_cffi` in `macos_legacy`](https://github.com/yt-dlp/yt-dlp/commit/b19ae095fdddd43c2a2c67d10fbe0d9a645bb98f) ([#9653](https://github.com/yt-dlp/yt-dlp/issues/9653)) by [bashonly](https://github.com/bashonly)
+ - [Optional dependencies cleanup](https://github.com/yt-dlp/yt-dlp/commit/58dd0f8d1eee6bc9fdc57f1923bed772fa3c946d) ([#9550](https://github.com/yt-dlp/yt-dlp/issues/9550)) by [bashonly](https://github.com/bashonly)
+ - [Print SHA sums to GHA logs](https://github.com/yt-dlp/yt-dlp/commit/e8032503b9517465b0e86d776fc1e60d8795d673) ([#9582](https://github.com/yt-dlp/yt-dlp/issues/9582)) by [bashonly](https://github.com/bashonly)
+ - [Update changelog for tarball and sdist](https://github.com/yt-dlp/yt-dlp/commit/17b96974a334688f76b57d350e07cae8cda46877) ([#9425](https://github.com/yt-dlp/yt-dlp/issues/9425)) by [bashonly](https://github.com/bashonly)
+- **cleanup**
+ - [Standardize `import datetime as dt`](https://github.com/yt-dlp/yt-dlp/commit/c305a25c1b16bcf7a5ec499c3b786ed1e2c748da) ([#8978](https://github.com/yt-dlp/yt-dlp/issues/8978)) by [pukkandan](https://github.com/pukkandan)
+ - ie: [No `from` stdlib imports in extractors](https://github.com/yt-dlp/yt-dlp/commit/e3a3ed8a981d9395c4859b6ef56cd02bc3148db2) by [pukkandan](https://github.com/pukkandan)
+ - Miscellaneous: [216f6a3](https://github.com/yt-dlp/yt-dlp/commit/216f6a3cb57824e6a3c859649ce058c199b1b247) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
+- **docs**
+ - [Update yt-dlp tagline](https://github.com/yt-dlp/yt-dlp/commit/388c979ac63a8774339fac2516fe1cc852b4276e) ([#9481](https://github.com/yt-dlp/yt-dlp/issues/9481)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+ - [Various manpage fixes](https://github.com/yt-dlp/yt-dlp/commit/df0e138fc02ae2764a44f2f59fc93c756c4d3ee2) by [leoheitmannruiz](https://github.com/leoheitmannruiz)
+- **test**
+ - [Workaround websocket server hanging](https://github.com/yt-dlp/yt-dlp/commit/f849d77ab54788446b995d256e1ee0894c4fb927) ([#9467](https://github.com/yt-dlp/yt-dlp/issues/9467)) by [coletdjnz](https://github.com/coletdjnz)
+ - `traversal`: [Separate traversal tests](https://github.com/yt-dlp/yt-dlp/commit/979ce2e786f2ee3fc783b6dc1ef4188d8805c923) ([#9574](https://github.com/yt-dlp/yt-dlp/issues/9574)) by [Grub4K](https://github.com/Grub4K)
+
### 2024.03.10
#### Core changes
diff --git a/Makefile b/Makefile
index 9344003..e1de7f3 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ all: lazy-extractors yt-dlp doc pypi-files
clean: clean-test clean-dist
clean-all: clean clean-cache
completions: completion-bash completion-fish completion-zsh
-doc: README.md CONTRIBUTING.md issuetemplates supportedsites
+doc: README.md CONTRIBUTING.md CONTRIBUTORS issuetemplates supportedsites
ot: offlinetest
tar: yt-dlp.tar.gz
@@ -10,9 +10,12 @@ tar: yt-dlp.tar.gz
# intended use: when building a source distribution,
# make pypi-files && python3 -m build -sn .
pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
- completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
+ completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
-.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
+.PHONY: all clean clean-all clean-test clean-dist clean-cache \
+ completions completion-bash completion-fish completion-zsh \
+ doc issuetemplates supportedsites ot offlinetest codetest test \
+ tar pypi-files lazy-extractors install uninstall
clean-test:
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
@@ -24,7 +27,7 @@ clean-dist:
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
clean-cache:
find . \( \
- -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
+ -type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
\) -prune -exec rm -rf {} \;
completion-bash: completions/bash/yt-dlp
@@ -67,14 +70,15 @@ uninstall:
rm -f $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish
codetest:
- flake8 .
+ ruff check .
+ autopep8 --diff .
test:
- $(PYTHON) -m pytest
+ $(PYTHON) -m pytest -Werror
$(MAKE) codetest
offlinetest: codetest
- $(PYTHON) -m pytest -k "not download"
+ $(PYTHON) -m pytest -Werror -m "not download"
CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
CODE_FOLDERS != $(CODE_FOLDERS_CMD)
@@ -148,7 +152,7 @@ yt-dlp.tar.gz: all
--exclude '*.pyo' \
--exclude '*~' \
--exclude '__pycache__' \
- --exclude '.pytest_cache' \
+ --exclude '.*_cache' \
--exclude '.git' \
-- \
README.md supportedsites.md Changelog.md LICENSE \
@@ -156,5 +160,14 @@ yt-dlp.tar.gz: all
Makefile yt-dlp.1 README.txt completions .gitignore \
setup.cfg yt-dlp yt_dlp pyproject.toml devscripts test
-AUTHORS:
- git shortlog -s -n HEAD | cut -f2 | sort > AUTHORS
+AUTHORS: Changelog.md
+ @if [ -d '.git' ] && command -v git > /dev/null ; then \
+ echo 'Generating $@ from git commit history' ; \
+ git shortlog -s -n HEAD | cut -f2 | sort > $@ ; \
+ fi
+
+CONTRIBUTORS: Changelog.md
+ @if [ -d '.git' ] && command -v git > /dev/null ; then \
+ echo 'Updating $@ from git commit history' ; \
+ $(PYTHON) devscripts/make_changelog.py -v -c > /dev/null ; \
+ fi
diff --git a/README.md b/README.md
index 1e108a2..e8cd6d3 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@
</div>
<!-- MANPAGE: END EXCLUDED SECTION -->
-yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
+yt-dlp is a feature-rich command-line audio/video downloader with support for [thousands of sites](supportedsites.md). The project is a fork of [youtube-dl](https://github.com/ytdl-org/youtube-dl) based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc).
<!-- MANPAGE: MOVE "USAGE AND OPTIONS" SECTION HERE -->
@@ -108,7 +108,6 @@ File|Description
[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary
[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows))
[yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary
-[yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update)
[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary
[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary
[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update)
@@ -158,6 +157,7 @@ When using `--update`/`-U`, a release binary will only update to its current cha
You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories.
Example usage:
+
* `yt-dlp --update-to master` switch to the `master` channel and update to its latest release
* `yt-dlp --update-to stable@2023.07.06` upgrade/downgrade to release to `stable` channel tag `2023.07.06`
* `yt-dlp --update-to 2023.10.07` upgrade/downgrade to tag `2023.10.07` if it exists on the current channel
@@ -169,7 +169,7 @@ Example usage:
yt-dlp --update-to nightly
# To install nightly with pip:
-python3 -m pip install -U --pre yt-dlp[default]
+python3 -m pip install -U --pre "yt-dlp[default]"
```
## DEPENDENCIES
@@ -196,6 +196,15 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
* [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE)
* [**requests**](https://github.com/psf/requests)\* - HTTP library. For HTTPS proxy and persistent connections support. Licensed under [Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE)
+#### Impersonation
+
+The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
+
+* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
+ * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
+ * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
+
+
### Metadata
* [**mutagen**](https://github.com/quodlibet/mutagen)\* - For `--embed-thumbnail` in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING)
@@ -253,7 +262,7 @@ You can also run `make yt-dlp` instead to compile only the binary without updati
### Standalone Py2Exe Builds (Windows)
-While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run.
+While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and need VC++14** on the target computer to run.
If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
@@ -389,6 +398,13 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
direct connection
--socket-timeout SECONDS Time to wait before giving up, in seconds
--source-address IP Client-side IP address to bind to
+ --impersonate CLIENT[:OS] Client to impersonate for requests. E.g.
+ chrome, chrome-110, chrome:windows-10. Pass
+ --impersonate="" to impersonate any client.
+ Note that forcing impersonation for all
+ requests may have a detrimental impact on
+ download speed and stability
+ --list-impersonate-targets List available clients to impersonate.
-4, --force-ipv4 Make all connections via IPv4
-6, --force-ipv6 Make all connections via IPv6
--enable-file-urls Enable file:// URLs. This is disabled by
@@ -468,6 +484,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--max-downloads NUMBER Abort after downloading NUMBER files
--break-on-existing Stop the download process when encountering
a file that is in the archive
+ --no-break-on-existing Do not stop the download process when
+ encountering a file that is in the archive
+ (default)
--break-per-input Alters --max-downloads, --break-on-existing,
--break-match-filter, and autonumber to
reset per input URL
@@ -649,16 +668,17 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
The name of the browser to load cookies
from. Currently supported browsers are:
brave, chrome, chromium, edge, firefox,
- opera, safari, vivaldi. Optionally, the
- KEYRING used for decrypting Chromium cookies
- on Linux, the name/path of the PROFILE to
- load cookies from, and the CONTAINER name
- (if Firefox) ("none" for no container) can
- be given with their respective seperators.
- By default, all containers of the most
- recently accessed profile are used.
- Currently supported keyrings are: basictext,
- gnomekeyring, kwallet, kwallet5, kwallet6
+ opera, safari, vivaldi, whale. Optionally,
+ the KEYRING used for decrypting Chromium
+ cookies on Linux, the name/path of the
+ PROFILE to load cookies from, and the
+ CONTAINER name (if Firefox) ("none" for no
+ container) can be given with their
+ respective seperators. By default, all
+ containers of the most recently accessed
+ profile are used. Currently supported
+ keyrings are: basictext, gnomekeyring,
+ kwallet, kwallet5, kwallet6
--no-cookies-from-browser Do not load cookies from browser (default)
--cache-dir DIR Location in the filesystem where yt-dlp can
store some downloaded information (such as
@@ -741,6 +761,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
accessible under "progress" key. E.g.
--console-title --progress-template
"download-title:%(info.id)s-%(progress.eta)s"
+ --progress-delta SECONDS Time between progress output (default: 0)
-v, --verbose Print various debugging information
--dump-pages Print downloaded pages encoded using base64
to debug problems (very verbose)
@@ -1459,9 +1480,9 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
- `width`: Width of the video, if known
- `height`: Height of the video, if known
- `aspect_ratio`: Aspect ratio of the video, if known
- - `tbr`: Average bitrate of audio and video in KBit/s
- - `abr`: Average audio bitrate in KBit/s
- - `vbr`: Average video bitrate in KBit/s
+ - `tbr`: Average bitrate of audio and video in [kbps](## "1000 bits/sec")
+ - `abr`: Average audio bitrate in [kbps](## "1000 bits/sec")
+ - `vbr`: Average video bitrate in [kbps](## "1000 bits/sec")
- `asr`: Audio sampling rate in Hertz
- `fps`: Frame rate
- `audio_channels`: The number of audio channels
@@ -1486,7 +1507,7 @@ Any string comparison may be prefixed with negation `!` in order to produce an o
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
-Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
+Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
@@ -1518,10 +1539,10 @@ The available fields are:
- `fps`: Framerate of video
- `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`)
- `channels`: The number of audio channels
- - `tbr`: Total average bitrate in KBit/s
- - `vbr`: Average video bitrate in KBit/s
- - `abr`: Average audio bitrate in KBit/s
- - `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr`
+ - `tbr`: Total average bitrate in [kbps](## "1000 bits/sec")
+ - `vbr`: Average video bitrate in [kbps](## "1000 bits/sec")
+ - `abr`: Average audio bitrate in [kbps](## "1000 bits/sec")
+ - `br`: Average bitrate in [kbps](## "1000 bits/sec"), `tbr`/`vbr`/`abr`
- `asr`: Audio sample rate in Hz
**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
@@ -1733,7 +1754,7 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-"
# EXTRACTOR ARGUMENTS
-Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"`
+Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;formats=incomplete" --extractor-args "funimation:version=uncut"`
Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"`
@@ -1742,7 +1763,7 @@ The following extractors use this feature:
#### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
-* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
+* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
@@ -1768,8 +1789,7 @@ The following extractors use this feature:
* `version`: The video version to extract - `uncut` or `simulcast`
#### crunchyrollbeta (Crunchyroll)
-* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2`
-* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
+* `hardsub`: One or more hardsub versions to extract (in order of preference), or `all` (default: `None` = no hardsubs will be extracted), e.g. `crunchyrollbeta:hardsub=en-US,de-DE`
#### vikichannel
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
@@ -1792,9 +1812,13 @@ The following extractors use this feature:
* `max_comments`: Maximum number of comments to extract - default is `120`
#### tiktok
-* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`
-* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
-* `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221`
+* `api_hostname`: Hostname to use for mobile API calls, e.g. `api22-normal-c-alisg.tiktokv.com`
+* `app_name`: Default app name to use with mobile API calls, e.g. `trill`
+* `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
+* `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
+* `aid`: Default app ID to use with mobile API calls, e.g. `1180`
+* `app_info`: Enable mobile API extraction with one or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
+* `device_id`: Enable mobile API extraction with a genuine device ID to be used with mobile API calls. Default is a random 19-digit string
#### rokfinchannel
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
@@ -1814,9 +1838,18 @@ The following extractors use this feature:
#### nflplusreplay
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
+#### jiocinema
+* `refresh_token`: The `refreshToken` UUID from browser local storage can be passed to extend the life of your login session when logging in with `token` as username and the `accessToken` from browser local storage as password
+
#### jiosaavn
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
+#### afreecatvlive
+* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
+
+#### soundcloud
+* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
+
**Note**: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
@@ -1874,6 +1907,7 @@ Plugins can be installed using various methods and locations.
`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages.
+
* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins/<type>/myplugin.py`
Run yt-dlp with `--verbose` to check if the plugin has been loaded.
@@ -2092,7 +2126,7 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
### New features
-* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@be008e6**](https://github.com/ytdl-org/youtube-dl/commit/be008e657d79832642e2158557c899249c9e31cd) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
+* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@a08f2b7**](https://github.com/ytdl-org/youtube-dl/commit/a08f2b7e4567cdc50c0614ee0a4ffdff49b8b6e6) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
@@ -2302,6 +2336,7 @@ These options may no longer work as intended
--write-annotations No supported site has annotations now
--no-write-annotations Default
--compat-options seperate-video-versions No longer needed
+ --compat-options no-youtube-prefer-utc-upload-date No longer supported
#### Removed
These options were deprecated since 2014 and have now been entirely removed
diff --git a/bundle/docker/compose.yml b/bundle/docker/compose.yml
new file mode 100644
index 0000000..5f89ca6
--- /dev/null
+++ b/bundle/docker/compose.yml
@@ -0,0 +1,10 @@
+services:
+ static:
+ build: static
+ environment:
+ channel: ${channel}
+ origin: ${origin}
+ version: ${version}
+ volumes:
+ - ~/build:/build
+ - ../..:/yt-dlp
diff --git a/bundle/docker/static/Dockerfile b/bundle/docker/static/Dockerfile
new file mode 100644
index 0000000..dae2dff
--- /dev/null
+++ b/bundle/docker/static/Dockerfile
@@ -0,0 +1,21 @@
+FROM alpine:3.19 as base
+
+RUN apk --update add --no-cache \
+ build-base \
+ python3 \
+ pipx \
+ ;
+
+RUN pipx install pyinstaller
+# Requires above step to prepare the shared venv
+RUN ~/.local/share/pipx/shared/bin/python -m pip install -U wheel
+RUN apk --update add --no-cache \
+ scons \
+ patchelf \
+ binutils \
+ ;
+RUN pipx install staticx
+
+WORKDIR /yt-dlp
+COPY entrypoint.sh /entrypoint.sh
+ENTRYPOINT /entrypoint.sh
diff --git a/bundle/docker/static/entrypoint.sh b/bundle/docker/static/entrypoint.sh
new file mode 100755
index 0000000..93d84fa
--- /dev/null
+++ b/bundle/docker/static/entrypoint.sh
@@ -0,0 +1,13 @@
+#!/bin/ash
+set -e
+
+source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
+python -m devscripts.install_deps --include secretstorage
+python -m devscripts.make_lazy_extractors
+python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
+python -m bundle.pyinstaller
+deactivate
+
+source ~/.local/share/pipx/venvs/staticx/bin/activate
+staticx /yt-dlp/dist/yt-dlp_linux /build/yt-dlp_linux
+deactivate
diff --git a/bundle/py2exe.py b/bundle/py2exe.py
index ccb52ea..2811674 100755
--- a/bundle/py2exe.py
+++ b/bundle/py2exe.py
@@ -28,7 +28,7 @@ def main():
}],
version_info={
'version': VERSION,
- 'description': 'A youtube-dl fork with additional features and patches',
+ 'description': 'A feature-rich command-line audio/video downloader',
'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>',
'product_name': 'yt-dlp',
'product_version': VERSION,
diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json
index 2a34ad0..86e8ec2 100644
--- a/devscripts/changelog_override.json
+++ b/devscripts/changelog_override.json
@@ -126,5 +126,48 @@
"when": "4ce57d3b873c2887814cbec03d029533e82f7db5",
"short": "[ie] Support multi-period MPD streams (#6654)",
"authors": ["alard", "pukkandan"]
+ },
+ {
+ "action": "change",
+ "when": "aa7e9ae4f48276bd5d0173966c77db9484f65a0a",
+ "short": "[ie/xvideos] Support new URL format (#9502)",
+ "authors": ["sta1us"]
+ },
+ {
+ "action": "remove",
+ "when": "22e4dfacb61f62dfbb3eb41b31c7b69ba1059b80"
+ },
+ {
+ "action": "change",
+ "when": "e3a3ed8a981d9395c4859b6ef56cd02bc3148db2",
+ "short": "[cleanup:ie] No `from` stdlib imports in extractors",
+ "authors": ["pukkandan"]
+ },
+ {
+ "action": "add",
+ "when": "9590cc6b4768e190183d7d071a6c78170889116a",
+ "short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly."
+ },
+ {
+ "action": "change",
+ "when": "41ba4a808b597a3afed78c89675a30deb6844450",
+ "short": "[ie/tiktok] Extract via mobile API only if extractor-arg is passed (#9938)",
+ "authors": ["bashonly"]
+ },
+ {
+ "action": "remove",
+ "when": "6e36d17f404556f0e3a43f441c477a71a91877d9"
+ },
+ {
+ "action": "change",
+ "when": "beaf832c7a9d57833f365ce18f6115b88071b296",
+ "short": "[ie/soundcloud] Add `formats` extractor-arg (#10004)",
+ "authors": ["bashonly", "Grub4K"]
+ },
+ {
+ "action": "change",
+ "when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6",
+ "short": "[cleanup] Misc (#9765)",
+ "authors": ["bashonly", "Grub4K", "seproDev"]
}
]
diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py
index 889d9ab..d292505 100755
--- a/devscripts/install_deps.py
+++ b/devscripts/install_deps.py
@@ -10,6 +10,8 @@ import argparse
import re
import subprocess
+from pathlib import Path
+
from devscripts.tomlparse import parse_toml
from devscripts.utils import read_file
@@ -17,44 +19,50 @@ from devscripts.utils import read_file
def parse_args():
parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp')
parser.add_argument(
- 'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
+ 'input', nargs='?', metavar='TOMLFILE', default=Path(__file__).parent.parent / 'pyproject.toml',
+ help='input file (default: %(default)s)')
parser.add_argument(
- '-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency')
+ '-e', '--exclude', metavar='DEPENDENCY', action='append',
+ help='exclude a dependency')
parser.add_argument(
- '-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
+ '-i', '--include', metavar='GROUP', action='append',
+ help='include an optional dependency group')
parser.add_argument(
- '-o', '--only-optional', action='store_true', help='Only install optional dependencies')
+ '-o', '--only-optional', action='store_true',
+ help='only install optional dependencies')
parser.add_argument(
- '-p', '--print', action='store_true', help='Only print a requirements.txt to stdout')
+ '-p', '--print', action='store_true',
+ help='only print requirements to stdout')
parser.add_argument(
- '-u', '--user', action='store_true', help='Install with pip as --user')
+ '-u', '--user', action='store_true',
+ help='install with pip as --user')
return parser.parse_args()
def main():
args = parse_args()
project_table = parse_toml(read_file(args.input))['project']
+ recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P<group_name>[\w-]+)\]')
optional_groups = project_table['optional-dependencies']
excludes = args.exclude or []
- deps = []
+ def yield_deps(group):
+ for dep in group:
+ if mobj := recursive_pattern.fullmatch(dep):
+ yield from optional_groups.get(mobj.group('group_name'), [])
+ else:
+ yield dep
+
+ targets = []
if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
- deps.extend(project_table['dependencies'])
+ targets.extend(project_table['dependencies'])
if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
- deps.extend(optional_groups['default'])
-
- def name(dependency):
- return re.match(r'[\w-]+', dependency)[0].lower()
-
- target_map = {name(dep): dep for dep in deps}
+ targets.extend(yield_deps(optional_groups['default']))
for include in filter(None, map(optional_groups.get, args.include or [])):
- target_map.update(zip(map(name, include), include))
-
- for exclude in map(name, excludes):
- target_map.pop(exclude, None)
+ targets.extend(yield_deps(include))
- targets = list(target_map.values())
+ targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes]
if args.print:
for target in targets:
diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py
index faab5fa..8e199e7 100644
--- a/devscripts/make_changelog.py
+++ b/devscripts/make_changelog.py
@@ -445,7 +445,32 @@ def get_new_contributors(contributors_path, commits):
return sorted(new_contributors, key=str.casefold)
-if __name__ == '__main__':
+def create_changelog(args):
+ logging.basicConfig(
+ datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
+ level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
+
+ commits = CommitRange(None, args.commitish, args.default_author)
+
+ if not args.no_override:
+ if args.override_path.exists():
+ overrides = json.loads(read_file(args.override_path))
+ commits.apply_overrides(overrides)
+ else:
+ logger.warning(f'File {args.override_path.as_posix()} does not exist')
+
+ logger.info(f'Loaded {len(commits)} commits')
+
+ new_contributors = get_new_contributors(args.contributors_path, commits)
+ if new_contributors:
+ if args.contributors:
+ write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
+ logger.info(f'New contributors: {", ".join(new_contributors)}')
+
+ return Changelog(commits.groups(), args.repo, args.collapsible)
+
+
+def create_parser():
import argparse
parser = argparse.ArgumentParser(
@@ -477,27 +502,9 @@ if __name__ == '__main__':
parser.add_argument(
'--collapsible', action='store_true',
help='make changelog collapsible (default: %(default)s)')
- args = parser.parse_args()
-
- logging.basicConfig(
- datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
- level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
-
- commits = CommitRange(None, args.commitish, args.default_author)
-
- if not args.no_override:
- if args.override_path.exists():
- overrides = json.loads(read_file(args.override_path))
- commits.apply_overrides(overrides)
- else:
- logger.warning(f'File {args.override_path.as_posix()} does not exist')
- logger.info(f'Loaded {len(commits)} commits')
+ return parser
- new_contributors = get_new_contributors(args.contributors_path, commits)
- if new_contributors:
- if args.contributors:
- write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
- logger.info(f'New contributors: {", ".join(new_contributors)}')
- print(Changelog(commits.groups(), args.repo, args.collapsible))
+if __name__ == '__main__':
+ print(create_changelog(create_parser().parse_args()))
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 9b12e71..47188e9 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -24,7 +24,7 @@ PREFIX = r'''%yt-dlp(1)
# NAME
-yt\-dlp \- A youtube-dl fork with additional features and patches
+yt\-dlp \- A feature\-rich command\-line audio/video downloader
# SYNOPSIS
@@ -43,6 +43,27 @@ def filter_excluded_sections(readme):
'', readme)
+def _convert_code_blocks(readme):
+ current_code_block = None
+
+ for line in readme.splitlines(True):
+ if current_code_block:
+ if line == current_code_block:
+ current_code_block = None
+ yield '\n'
+ else:
+ yield f' {line}'
+ elif line.startswith('```'):
+ current_code_block = line.count('`') * '`' + '\n'
+ yield '\n'
+ else:
+ yield line
+
+
+def convert_code_blocks(readme):
+ return ''.join(_convert_code_blocks(readme))
+
+
def move_sections(readme):
MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->'
sections = re.findall(r'(?m)^%s$' % (
@@ -65,8 +86,10 @@ def move_sections(readme):
def filter_options(readme):
section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
+ section_new = section.replace('*', R'\*')
+
options = '# OPTIONS\n'
- for line in section.split('\n')[1:]:
+ for line in section_new.split('\n')[1:]:
mobj = re.fullmatch(r'''(?x)
\s{4}(?P<opt>-(?:,\s|[^\s])+)
(?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))?
@@ -86,7 +109,7 @@ def filter_options(readme):
return readme.replace(section, options, 1)
-TRANSFORM = compose_functions(filter_excluded_sections, move_sections, filter_options)
+TRANSFORM = compose_functions(filter_excluded_sections, convert_code_blocks, move_sections, filter_options)
def main():
diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat
deleted file mode 100644
index 57b1f4b..0000000
--- a/devscripts/run_tests.bat
+++ /dev/null
@@ -1,4 +0,0 @@
-@echo off
-
->&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead
-python %~dp0run_tests.py %~1
diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py
index 6d638a9..c605aa6 100755
--- a/devscripts/run_tests.py
+++ b/devscripts/run_tests.py
@@ -4,6 +4,7 @@ import argparse
import functools
import os
import re
+import shlex
import subprocess
import sys
from pathlib import Path
@@ -18,6 +19,8 @@ def parse_args():
'test', help='a extractor tests, or one of "core" or "download"', nargs='*')
parser.add_argument(
'-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
+ parser.add_argument(
+ '--pytest-args', help='arguments to passthrough to pytest')
return parser.parse_args()
@@ -26,15 +29,16 @@ def run_tests(*tests, pattern=None, ci=False):
run_download = 'download' in tests
tests = list(map(fix_test_name, tests))
- arguments = ['pytest', '-Werror', '--tb=short']
+ pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '')
+ arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)]
if ci:
arguments.append('--color=yes')
+ if pattern:
+ arguments.extend(['-k', pattern])
if run_core:
arguments.extend(['-m', 'not download'])
elif run_download:
arguments.extend(['-m', 'download'])
- elif pattern:
- arguments.extend(['-k', pattern])
else:
arguments.extend(
f'test/test_download.py::TestDownload::test_{test}' for test in tests)
@@ -46,13 +50,13 @@ def run_tests(*tests, pattern=None, ci=False):
pass
arguments = [sys.executable, '-Werror', '-m', 'unittest']
+ if pattern:
+ arguments.extend(['-k', pattern])
if run_core:
print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True)
return 1
elif run_download:
arguments.append('test.test_download')
- elif pattern:
- arguments.extend(['-k', pattern])
else:
arguments.extend(
f'test.test_download.TestDownload.test_{test}' for test in tests)
diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh
deleted file mode 100755
index 123ceb1..0000000
--- a/devscripts/run_tests.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/usr/bin/env sh
-
->&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead'
-python3 devscripts/run_tests.py "$1"
diff --git a/devscripts/tomlparse.py b/devscripts/tomlparse.py
index 85ac4ee..ac9ea31 100755
--- a/devscripts/tomlparse.py
+++ b/devscripts/tomlparse.py
@@ -11,7 +11,7 @@ IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
from __future__ import annotations
-import datetime
+import datetime as dt
import json
import re
@@ -115,9 +115,9 @@ def parse_value(data: str, index: int):
for func in [
int,
float,
- datetime.time.fromisoformat,
- datetime.date.fromisoformat,
- datetime.datetime.fromisoformat,
+ dt.time.fromisoformat,
+ dt.date.fromisoformat,
+ dt.datetime.fromisoformat,
{'true': True, 'false': False}.get,
]:
try:
@@ -179,7 +179,7 @@ def main():
data = file.read()
def default(obj):
- if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
+ if isinstance(obj, (dt.date, dt.time, dt.datetime)):
return obj.isoformat()
print(json.dumps(parse_toml(data), default=default))
diff --git a/devscripts/update-version.py b/devscripts/update-version.py
index da54a6a..07a0717 100644
--- a/devscripts/update-version.py
+++ b/devscripts/update-version.py
@@ -9,15 +9,15 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import argparse
import contextlib
+import datetime as dt
import sys
-from datetime import datetime, timezone
from devscripts.utils import read_version, run_process, write_file
def get_new_version(version, revision):
if not version:
- version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
+ version = dt.datetime.now(dt.timezone.utc).strftime('%Y.%m.%d')
if revision:
assert revision.isdecimal(), 'Revision must be a number'
diff --git a/devscripts/update_changelog.py b/devscripts/update_changelog.py
new file mode 100755
index 0000000..36b9a8e
--- /dev/null
+++ b/devscripts/update_changelog.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from pathlib import Path
+
+from devscripts.make_changelog import create_changelog, create_parser
+from devscripts.utils import read_file, read_version, write_file
+
+# Always run after devscripts/update-version.py, and run before `make doc|pypi-files|tar|all`
+
+if __name__ == '__main__':
+ parser = create_parser()
+ parser.description = 'Update an existing changelog file with an entry for a new release'
+ parser.add_argument(
+ '--changelog-path', type=Path, default=Path(__file__).parent.parent / 'Changelog.md',
+ help='path to the Changelog file')
+ args = parser.parse_args()
+ new_entry = create_changelog(args)
+
+ header, sep, changelog = read_file(args.changelog_path).partition('\n### ')
+ write_file(args.changelog_path, f'{header}{sep}{read_version()}\n{new_entry}\n{sep}{changelog}')
diff --git a/pyinst.py b/pyinst.py
deleted file mode 100755
index 4a8ed2d..0000000
--- a/pyinst.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python3
-
-# Allow execution from anywhere
-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-import warnings
-
-from bundle.pyinstaller import main
-
-warnings.warn(DeprecationWarning('`pyinst.py` is deprecated and will be removed in a future version. '
- 'Use `bundle.pyinstaller` instead'))
-
-if __name__ == '__main__':
- main()
diff --git a/pyproject.toml b/pyproject.toml
index 64504ff..96cb368 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,7 @@ maintainers = [
{name = "bashonly", email = "bashonly@protonmail.com"},
{name = "coletdjnz", email = "coletdjnz@protonmail.com"},
]
-description = "A youtube-dl fork with additional features and patches"
+description = "A feature-rich command-line audio/video downloader"
readme = "README.md"
requires-python = ">=3.8"
keywords = [
@@ -53,6 +53,7 @@ dependencies = [
[project.optional-dependencies]
default = []
+curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
secretstorage = [
"cffi",
"secretstorage",
@@ -61,15 +62,29 @@ build = [
"build",
"hatchling",
"pip",
+ "setuptools>=66.1.0,<70",
"wheel",
]
dev = [
- "flake8",
- "isort",
- "pytest",
+ "pre-commit",
+ "yt-dlp[static-analysis]",
+ "yt-dlp[test]",
+]
+static-analysis = [
+ "autopep8~=2.0",
+ "ruff~=0.4.4",
+]
+test = [
+ "pytest~=8.1",
+]
+pyinstaller = [
+ "pyinstaller>=6.3; sys_platform!='darwin'",
+ "pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi
+]
+py2exe = [
+ "py2exe>=0.12",
+ "requests==2.31.*",
]
-pyinstaller = ["pyinstaller>=6.3"]
-py2exe = ["py2exe>=0.12"]
[project.urls]
Documentation = "https://github.com/yt-dlp/yt-dlp#readme"
@@ -118,3 +133,146 @@ artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
[tool.hatch.version]
path = "yt_dlp/version.py"
pattern = "_pkg_version = '(?P<version>[^']+)'"
+
+[tool.hatch.envs.default]
+features = ["curl-cffi", "default"]
+dependencies = ["pre-commit"]
+path = ".venv"
+installer = "uv"
+
+[tool.hatch.envs.default.scripts]
+setup = "pre-commit install --config .pre-commit-hatch.yaml"
+yt-dlp = "python -Werror -Xdev -m yt_dlp {args}"
+
+[tool.hatch.envs.hatch-static-analysis]
+detached = true
+features = ["static-analysis"]
+dependencies = [] # override hatch ruff version
+config-path = "pyproject.toml"
+
+[tool.hatch.envs.hatch-static-analysis.scripts]
+format-check = "autopep8 --diff {args:.}"
+format-fix = "autopep8 --in-place {args:.}"
+lint-check = "ruff check {args:.}"
+lint-fix = "ruff check --fix {args:.}"
+
+[tool.hatch.envs.hatch-test]
+features = ["test"]
+dependencies = [
+ "pytest-randomly~=3.15",
+ "pytest-rerunfailures~=14.0",
+ "pytest-xdist[psutil]~=3.5",
+]
+
+[tool.hatch.envs.hatch-test.scripts]
+run = "python -m devscripts.run_tests {args}"
+run-cov = "echo Code coverage not implemented && exit 1"
+
+[[tool.hatch.envs.hatch-test.matrix]]
+python = [
+ "3.8",
+ "3.9",
+ "3.10",
+ "3.11",
+ "3.12",
+ "pypy3.8",
+ "pypy3.9",
+ "pypy3.10",
+]
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+ignore = [
+ "E402", # module level import not at top of file
+ "E501", # line too long
+ "E731", # do not assign a lambda expression, use a def
+ "E741", # ambiguous variable name
+]
+select = [
+ "E", # pycodestyle errors
+ "W", # pycodestyle warnings
+ "F", # pyflakes
+ "I", # import order
+]
+
+[tool.ruff.lint.per-file-ignores]
+"devscripts/lazy_load_template.py" = ["F401"]
+"!yt_dlp/extractor/**.py" = ["I"]
+
+[tool.ruff.lint.isort]
+known-first-party = [
+ "bundle",
+ "devscripts",
+ "test",
+]
+relative-imports-order = "closest-to-furthest"
+
+[tool.autopep8]
+max_line_length = 120
+recursive = true
+exit-code = true
+jobs = 0
+select = [
+ "E101",
+ "E112",
+ "E113",
+ "E115",
+ "E116",
+ "E117",
+ "E121",
+ "E122",
+ "E123",
+ "E124",
+ "E125",
+ "E126",
+ "E127",
+ "E128",
+ "E129",
+ "E131",
+ "E201",
+ "E202",
+ "E203",
+ "E211",
+ "E221",
+ "E222",
+ "E223",
+ "E224",
+ "E225",
+ "E226",
+ "E227",
+ "E228",
+ "E231",
+ "E241",
+ "E242",
+ "E251",
+ "E252",
+ "E261",
+ "E262",
+ "E265",
+ "E266",
+ "E271",
+ "E272",
+ "E273",
+ "E274",
+ "E275",
+ "E301",
+ "E302",
+ "E303",
+ "E304",
+ "E305",
+ "E306",
+ "E502",
+ "E701",
+ "E702",
+ "E704",
+ "W391",
+ "W504",
+]
+
+[tool.pytest.ini_options]
+addopts = "-ra -v --strict-markers"
+markers = [
+ "download",
+]
diff --git a/setup.cfg b/setup.cfg
index aeb4cee..340cc3b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -14,12 +14,6 @@ remove-duplicate-keys = true
remove-unused-variables = true
-[tool:pytest]
-addopts = -ra -v --strict-markers
-markers =
- download
-
-
[tox:tox]
skipsdist = true
envlist = py{38,39,310,311,312},pypy{38,39,310}
diff --git a/setup.py b/setup.py
deleted file mode 100755
index 8d1e6d1..0000000
--- a/setup.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-
-# Allow execution from anywhere
-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-import warnings
-
-
-if sys.argv[1:2] == ['py2exe']:
- warnings.warn(DeprecationWarning('`setup.py py2exe` is deprecated and will be removed in a future version. '
- 'Use `bundle.py2exe` instead'))
-
- import bundle.py2exe
-
- bundle.py2exe.main()
-
-elif 'build_lazy_extractors' in sys.argv:
- warnings.warn(DeprecationWarning('`setup.py build_lazy_extractors` is deprecated and will be removed in a future version. '
- 'Use `devscripts.make_lazy_extractors` instead'))
-
- import subprocess
-
- os.chdir(sys.path[0])
- print('running build_lazy_extractors')
- subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py'])
-
-else:
-
- print(
- 'ERROR: Building by calling `setup.py` is deprecated. '
- 'Use a build frontend like `build` instead. ',
- 'Refer to https://build.pypa.io for more info', file=sys.stderr)
- sys.exit(1)
diff --git a/supportedsites.md b/supportedsites.md
index a4b2d57..3873956 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -14,7 +14,6 @@
- **6play**
- **7plus**
- **8tracks**
- - **91porn**
- **9c9media**
- **9gag**: 9GAG
- **9News**
@@ -47,7 +46,7 @@
- **aenetworks:show**
- **AeonCo**
- **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
- - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com
+ - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams
- **afreecatv:user**
- **AirTV**
- **AitubeKZVideo**
@@ -105,6 +104,7 @@
- **ArteTVPlaylist**
- **asobichannel**: ASOBI CHANNEL
- **asobichannel:tag**: ASOBI CHANNEL
+ - **AsobiStage**: ASOBISTAGE (アソビステージ)
- **AtresPlayer**: [*atresplayer*](## "netrc machine")
- **AtScaleConfEvent**
- **ATVAt**
@@ -219,7 +219,7 @@
- **BusinessInsider**
- **BuzzFeed**
- **BYUtv**: (**Currently broken**)
- - **CableAV**
+ - **CaffeineTV**
- **Callin**
- **Caltrans**
- **CAM4**
@@ -332,6 +332,8 @@
- **DailyWirePodcast**
- **damtomo:record**
- **damtomo:video**
+ - **dangalplay**: [*dangalplay*](## "netrc machine")
+ - **dangalplay:season**: [*dangalplay*](## "netrc machine")
- **daum.net**
- **daum.net:clip**
- **daum.net:playlist**
@@ -395,7 +397,6 @@
- **EinsUndEinsTV**: [*1und1tv*](## "netrc machine")
- **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine")
- **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine")
- - **Einthusan**
- **eitb.tv**
- **ElementorEmbed**
- **Elonet**
@@ -436,6 +437,7 @@
- **FacebookPluginsVideo**
- **fancode:live**: [*fancode*](## "netrc machine") (**Currently broken**)
- **fancode:vod**: [*fancode*](## "netrc machine") (**Currently broken**)
+ - **Fathom**
- **faz.net**
- **fc2**: [*fc2*](## "netrc machine")
- **fc2:embed**
@@ -496,6 +498,7 @@
- **GameStar**
- **Gaskrank**
- **Gazeta**: (**Currently broken**)
+ - **GBNews**: GB News clips, features and live streams
- **GDCVault**: [*gdcvault*](## "netrc machine") (**Currently broken**)
- **GediDigital**
- **gem.cbc.ca**: [*cbcgem*](## "netrc machine")
@@ -525,6 +528,7 @@
- **GMANetworkVideo**
- **Go**
- **GoDiscovery**
+ - **GodResource**
- **GodTube**: (**Currently broken**)
- **Gofile**
- **Golem**
@@ -628,13 +632,14 @@
- **iwara:user**: [*iwara*](## "netrc machine")
- **Ixigua**
- **Izlesene**
- - **Jable**
- - **JablePlaylist**
- **Jamendo**
- **JamendoAlbum**
- **JeuxVideo**: (**Currently broken**)
- - **JioSaavnAlbum**
- - **JioSaavnSong**
+ - **jiocinema**: [*jiocinema*](## "netrc machine")
+ - **jiocinema:series**: [*jiocinema*](## "netrc machine")
+ - **jiosaavn:album**
+ - **jiosaavn:playlist**
+ - **jiosaavn:song**
- **Joj**
- **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
- **Jove**
@@ -716,6 +721,8 @@
- **Lnk**
- **LnkGo**
- **loc**: Library of Congress
+ - **loom**
+ - **loom:folder**
- **LoveHomePorn**
- **LRTStream**
- **LRTVOD**
@@ -969,6 +976,7 @@
- **NRKTVSeason**
- **NRKTVSeries**
- **NRLTV**: (**Currently broken**)
+ - **nts.live**
- **ntv.ru**
- **NubilesPorn**: [*nubiles-porn*](## "netrc machine")
- **nuum:live**
@@ -1010,7 +1018,6 @@
- **orf:on**
- **orf:podcast**
- **orf:radio**
- - **orf:tvthek**: ORF TVthek
- **OsnatelTV**: [*osnateltv*](## "netrc machine")
- **OsnatelTVLive**: [*osnateltv*](## "netrc machine")
- **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine")
@@ -1136,6 +1143,7 @@
- **Radiko**
- **RadikoRadio**
- **radio.de**: (**Currently broken**)
+ - **Radio1Be**
- **radiocanada**
- **radiocanada:audiovideo**
- **RadioComercial**
@@ -1288,6 +1296,7 @@
- **SeznamZpravyArticle**
- **Shahid**: [*shahid*](## "netrc machine")
- **ShahidShow**
+ - **SharePoint**
- **ShareVideosEmbed**
- **ShemarooMe**
- **ShowRoomLive**
@@ -1387,6 +1396,10 @@
- **SztvHu**
- **t-online.de**: (**Currently broken**)
- **Tagesschau**: (**Currently broken**)
+ - **TapTapApp**
+ - **TapTapAppIntl**
+ - **TapTapMoment**
+ - **TapTapPostIntl**
- **Tass**: (**Currently broken**)
- **TBS**
- **TBSJPEpisode**
@@ -1405,7 +1418,7 @@
- **TedSeries**
- **TedTalk**
- **Tele13**
- - **Tele5**: (**Currently broken**)
+ - **Tele5**
- **TeleBruxelles**
- **TelecaribePlay**
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
@@ -1445,11 +1458,12 @@
- **ThreeSpeak**
- **ThreeSpeakUser**
- **TikTok**
+ - **tiktok:collection**
- **tiktok:effect**: (**Currently broken**)
- **tiktok:live**
- **tiktok:sound**: (**Currently broken**)
- **tiktok:tag**: (**Currently broken**)
- - **tiktok:user**: (**Currently broken**)
+ - **tiktok:user**
- **TLC**
- **TMZ**
- **TNAFlix**
@@ -1494,7 +1508,7 @@
- **tv2play.hu**
- **tv2playseries.hu**
- **TV4**: tv4.se and tv4play.se
- - **TV5MondePlus**: TV5MONDE+
+ - **TV5MONDE**
- **tv5unis**
- **tv5unis:video**
- **tv8.it**
@@ -1632,8 +1646,6 @@
- **voicy**: (**Currently broken**)
- **voicy:channel**: (**Currently broken**)
- **VolejTV**
- - **Voot**: [*voot*](## "netrc machine") (**Currently broken**)
- - **VootSeries**: [*voot*](## "netrc machine") (**Currently broken**)
- **VoxMedia**
- **VoxMediaVolume**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
@@ -1708,10 +1720,10 @@
- **wykop:​post:comment**
- **Xanimu**
- **XboxClips**
- - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing
- **XHamster**
- **XHamsterEmbed**
- **XHamsterUser**
+ - **XiaoHongShu**: 小红书
- **ximalaya**: 喜马拉雅FM
- **ximalaya:album**: 喜马拉雅FM 专辑
- **xinpianchang**: xinpianchang.com (**Currently broken**)
@@ -1742,8 +1754,12 @@
- **YouNowLive**
- **YouNowMoment**
- **YouPorn**
- - **YourPorn**
- - **YourUpload**
+ - **YouPornCategory**: YouPorn category, with sorting, filtering and pagination
+ - **YouPornChannel**: YouPorn channel, with sorting and pagination
+ - **YouPornCollection**: YouPorn collection (user playlist), with sorting and pagination
+ - **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination
+ - **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination
+ - **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination
- **youtube**: YouTube
- **youtube:clip**
- **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
diff --git a/test/conftest.py b/test/conftest.py
index 2fbc269..decd2c8 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -1,4 +1,3 @@
-import functools
import inspect
import pytest
@@ -10,7 +9,9 @@ from yt_dlp.utils._utils import _YDLLogger as FakeLogger
@pytest.fixture
def handler(request):
- RH_KEY = request.param
+ RH_KEY = getattr(request, 'param', None)
+ if not RH_KEY:
+ return
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
handler = RH_KEY
elif RH_KEY in _REQUEST_HANDLERS:
@@ -18,9 +19,46 @@ def handler(request):
else:
pytest.skip(f'{RH_KEY} request handler is not available')
- return functools.partial(handler, logger=FakeLogger)
+ class HandlerWrapper(handler):
+ RH_KEY = handler.RH_KEY
+ def __init__(self, *args, **kwargs):
+ super().__init__(logger=FakeLogger, *args, **kwargs)
-def validate_and_send(rh, req):
- rh.validate(req)
- return rh.send(req)
+ return HandlerWrapper
+
+
+@pytest.fixture(autouse=True)
+def skip_handler(request, handler):
+ """usage: pytest.mark.skip_handler('my_handler', 'reason')"""
+ for marker in request.node.iter_markers('skip_handler'):
+ if marker.args[0] == handler.RH_KEY:
+ pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
+
+
+@pytest.fixture(autouse=True)
+def skip_handler_if(request, handler):
+ """usage: pytest.mark.skip_handler_if('my_handler', lambda request: True, 'reason')"""
+ for marker in request.node.iter_markers('skip_handler_if'):
+ if marker.args[0] == handler.RH_KEY and marker.args[1](request):
+ pytest.skip(marker.args[2] if len(marker.args) > 2 else '')
+
+
+@pytest.fixture(autouse=True)
+def skip_handlers_if(request, handler):
+ """usage: pytest.mark.skip_handlers_if(lambda request, handler: True, 'reason')"""
+ for marker in request.node.iter_markers('skip_handlers_if'):
+ if handler and marker.args[0](request, handler):
+ pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
+
+
+def pytest_configure(config):
+ config.addinivalue_line(
+ "markers", "skip_handler(handler): skip test for the given handler",
+ )
+ config.addinivalue_line(
+ "markers", "skip_handler_if(handler): skip test for the given handler if condition is true"
+ )
+ config.addinivalue_line(
+ "markers", "skip_handlers_if(handler): skip test for handlers when the condition is true"
+ )
diff --git a/test/helper.py b/test/helper.py
index 7760fd8..e747312 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -338,3 +338,8 @@ def http_server_port(httpd):
def verify_address_availability(address):
if find_available_port(address) is None:
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
+
+
+def validate_and_send(rh, req):
+ rh.validate(req)
+ return rh.send(req)
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index b7dee49..744587e 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1906,6 +1906,15 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
expected_status=TEAPOT_RESPONSE_STATUS)
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
+ def test_search_nextjs_data(self):
+ data = '<script id="__NEXT_DATA__" type="application/json">{"props":{}}</script>'
+ self.assertEqual(self.ie._search_nextjs_data(data, None), {'props': {}})
+ self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
+ self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
+ self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
+ with self.assertWarns(DeprecationWarning):
+ self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 6be47af..5242cf8 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -183,7 +183,7 @@ class TestFormatSelection(unittest.TestCase):
]
info_dict = _make_result(formats)
- ydl = YDL({'format': 'best'})
+ ydl = YDL({'format': 'best', 'format_sort': ['abr', 'ext']})
ydl.sort_formats(info_dict)
ydl.process_ie_result(copy.deepcopy(info_dict))
downloaded = ydl.downloaded_info_dicts[0]
@@ -195,7 +195,7 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'mp3-64')
- ydl = YDL({'prefer_free_formats': True})
+ ydl = YDL({'prefer_free_formats': True, 'format_sort': ['abr', 'ext']})
ydl.sort_formats(info_dict)
ydl.process_ie_result(copy.deepcopy(info_dict))
downloaded = ydl.downloaded_info_dicts[0]
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 5282ef6..bd61f30 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -1,5 +1,5 @@
+import datetime as dt
import unittest
-from datetime import datetime, timezone
from yt_dlp import cookies
from yt_dlp.cookies import (
@@ -138,7 +138,7 @@ class TestCookies(unittest.TestCase):
self.assertEqual(cookie.name, 'foo')
self.assertEqual(cookie.value, 'test%20%3Bcookie')
self.assertFalse(cookie.secure)
- expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc)
+ expected_expiration = dt.datetime(2021, 6, 18, 21, 39, 19, tzinfo=dt.timezone.utc)
self.assertEqual(cookie.expires, int(expected_expiration.timestamp()))
def test_pbkdf2_sha1(self):
diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py
new file mode 100644
index 0000000..1b21fe7
--- /dev/null
+++ b/test/test_http_proxy.py
@@ -0,0 +1,380 @@
+import abc
+import base64
+import contextlib
+import functools
+import json
+import os
+import random
+import ssl
+import threading
+from http.server import BaseHTTPRequestHandler
+from socketserver import ThreadingTCPServer
+
+import pytest
+
+from test.helper import http_server_port, verify_address_availability
+from test.test_networking import TEST_DIR
+from test.test_socks import IPv6ThreadingTCPServer
+from yt_dlp.dependencies import urllib3
+from yt_dlp.networking import Request
+from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError
+
+
+class HTTPProxyAuthMixin:
+
+ def proxy_auth_error(self):
+ self.send_response(407)
+ self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"')
+ self.end_headers()
+ return False
+
+ def do_proxy_auth(self, username, password):
+ if username is None and password is None:
+ return True
+
+ proxy_auth_header = self.headers.get('Proxy-Authorization', None)
+ if proxy_auth_header is None:
+ return self.proxy_auth_error()
+
+ if not proxy_auth_header.startswith('Basic '):
+ return self.proxy_auth_error()
+
+ auth = proxy_auth_header[6:]
+
+ try:
+ auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1)
+ except Exception:
+ return self.proxy_auth_error()
+
+ if auth_username != (username or '') or auth_password != (password or ''):
+ return self.proxy_auth_error()
+ return True
+
+
+class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
+ def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs):
+ self.username = username
+ self.password = password
+ self.proxy_info = proxy_info
+ super().__init__(*args, **kwargs)
+
+ def do_GET(self):
+ if not self.do_proxy_auth(self.username, self.password):
+ self.server.close_request(self.request)
+ return
+ if self.path.endswith('/proxy_info'):
+ payload = json.dumps(self.proxy_info or {
+ 'client_address': self.client_address,
+ 'connect': False,
+ 'connect_host': None,
+ 'connect_port': None,
+ 'headers': dict(self.headers),
+ 'path': self.path,
+ 'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
+ })
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload.encode())
+ else:
+ self.send_response(404)
+ self.end_headers()
+
+ self.server.close_request(self.request)
+
+
+if urllib3:
+ import urllib3.util.ssltransport
+
+ class SSLTransport(urllib3.util.ssltransport.SSLTransport):
+ """
+ Modified version of urllib3 SSLTransport to support server side SSL
+
+ This allows us to chain multiple TLS connections.
+ """
+
+ def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False):
+ self.incoming = ssl.MemoryBIO()
+ self.outgoing = ssl.MemoryBIO()
+
+ self.suppress_ragged_eofs = suppress_ragged_eofs
+ self.socket = socket
+
+ self.sslobj = ssl_context.wrap_bio(
+ self.incoming,
+ self.outgoing,
+ server_hostname=server_hostname,
+ server_side=server_side
+ )
+ self._ssl_io_loop(self.sslobj.do_handshake)
+
+ @property
+ def _io_refs(self):
+ return self.socket._io_refs
+
+ @_io_refs.setter
+ def _io_refs(self, value):
+ self.socket._io_refs = value
+
+ def shutdown(self, *args, **kwargs):
+ self.socket.shutdown(*args, **kwargs)
+else:
+ SSLTransport = None
+
+
+class HTTPSProxyHandler(HTTPProxyHandler):
+ def __init__(self, request, *args, **kwargs):
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.load_cert_chain(certfn, None)
+ if isinstance(request, ssl.SSLSocket):
+ request = SSLTransport(request, ssl_context=sslctx, server_side=True)
+ else:
+ request = sslctx.wrap_socket(request, server_side=True)
+ super().__init__(request, *args, **kwargs)
+
+
+class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
+ protocol_version = 'HTTP/1.1'
+ default_request_version = 'HTTP/1.1'
+
+ def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs):
+ self.username = username
+ self.password = password
+ self.request_handler = request_handler
+ super().__init__(*args, **kwargs)
+
+ def do_CONNECT(self):
+ if not self.do_proxy_auth(self.username, self.password):
+ self.server.close_request(self.request)
+ return
+ self.send_response(200)
+ self.end_headers()
+ proxy_info = {
+ 'client_address': self.client_address,
+ 'connect': True,
+ 'connect_host': self.path.split(':')[0],
+ 'connect_port': int(self.path.split(':')[1]),
+ 'headers': dict(self.headers),
+ 'path': self.path,
+ 'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
+ }
+ self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info)
+ self.server.close_request(self.request)
+
+
+class HTTPSConnectProxyHandler(HTTPConnectProxyHandler):
+ def __init__(self, request, *args, **kwargs):
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.load_cert_chain(certfn, None)
+ request = sslctx.wrap_socket(request, server_side=True)
+ self._original_request = request
+ super().__init__(request, *args, **kwargs)
+
+ def do_CONNECT(self):
+ super().do_CONNECT()
+ self.server.close_request(self._original_request)
+
+
+@contextlib.contextmanager
+def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs):
+ server = server_thread = None
+ try:
+ bind_address = bind_ip or '127.0.0.1'
+ server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
+ server = server_type(
+ (bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs))
+ server_port = http_server_port(server)
+ server_thread = threading.Thread(target=server.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+ if '.' not in bind_address:
+ yield f'[{bind_address}]:{server_port}'
+ else:
+ yield f'{bind_address}:{server_port}'
+ finally:
+ server.shutdown()
+ server.server_close()
+ server_thread.join(2.0)
+
+
+class HTTPProxyTestContext(abc.ABC):
+ REQUEST_HANDLER_CLASS = None
+ REQUEST_PROTO = None
+
+ def http_server(self, server_class, *args, **kwargs):
+ return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
+
+ @abc.abstractmethod
+ def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
+ """return a dict of proxy_info"""
+
+
+class HTTPProxyHTTPTestContext(HTTPProxyTestContext):
+ # Standard HTTP Proxy for http requests
+ REQUEST_HANDLER_CLASS = HTTPProxyHandler
+ REQUEST_PROTO = 'http'
+
+ def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
+ request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
+ handler.validate(request)
+ return json.loads(handler.send(request).read().decode())
+
+
+class HTTPProxyHTTPSTestContext(HTTPProxyTestContext):
+ # HTTP Connect proxy, for https requests
+ REQUEST_HANDLER_CLASS = HTTPSProxyHandler
+ REQUEST_PROTO = 'https'
+
+ def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
+ request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
+ handler.validate(request)
+ return json.loads(handler.send(request).read().decode())
+
+
+CTX_MAP = {
+ 'http': HTTPProxyHTTPTestContext,
+ 'https': HTTPProxyHTTPSTestContext,
+}
+
+
+@pytest.fixture(scope='module')
+def ctx(request):
+ return CTX_MAP[request.param]()
+
+
+@pytest.mark.parametrize(
+ 'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
+@pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http
+class TestHTTPProxy:
+ def test_http_no_auth(self, handler, ctx):
+ with ctx.http_server(HTTPProxyHandler) as server_address:
+ with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
+ proxy_info = ctx.proxy_info_request(rh)
+ assert proxy_info['proxy'] == server_address
+ assert proxy_info['connect'] is False
+ assert 'Proxy-Authorization' not in proxy_info['headers']
+
+ def test_http_auth(self, handler, ctx):
+ with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
+ with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
+ proxy_info = ctx.proxy_info_request(rh)
+ assert proxy_info['proxy'] == server_address
+ assert 'Proxy-Authorization' in proxy_info['headers']
+
+ def test_http_bad_auth(self, handler, ctx):
+ with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
+ with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
+ with pytest.raises(HTTPError) as exc_info:
+ ctx.proxy_info_request(rh)
+ assert exc_info.value.response.status == 407
+ exc_info.value.response.close()
+
+ def test_http_source_address(self, handler, ctx):
+ with ctx.http_server(HTTPProxyHandler) as server_address:
+ source_address = f'127.0.0.{random.randint(5, 255)}'
+ verify_address_availability(source_address)
+ with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
+ source_address=source_address) as rh:
+ proxy_info = ctx.proxy_info_request(rh)
+ assert proxy_info['proxy'] == server_address
+ assert proxy_info['client_address'][0] == source_address
+
+ @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
+ def test_https(self, handler, ctx):
+ with ctx.http_server(HTTPSProxyHandler) as server_address:
+ with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
+ proxy_info = ctx.proxy_info_request(rh)
+ assert proxy_info['proxy'] == server_address
+ assert proxy_info['connect'] is False
+ assert 'Proxy-Authorization' not in proxy_info['headers']
+
+ @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
+ def test_https_verify_failed(self, handler, ctx):
+ with ctx.http_server(HTTPSProxyHandler) as server_address:
+ with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
+ # Accept SSLError as may not be feasible to tell if it is proxy or request error.
+ # note: if request proto also does ssl verification, this may also be the error of the request.
+ # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
+ with pytest.raises((ProxyError, SSLError)):
+ ctx.proxy_info_request(rh)
+
+ def test_http_with_idn(self, handler, ctx):
+ with ctx.http_server(HTTPProxyHandler) as server_address:
+ with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
+ proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw')
+ assert proxy_info['proxy'] == server_address
+ assert proxy_info['path'].startswith('http://xn--fiq228c.tw')
+ assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw'
+
+
+@pytest.mark.parametrize(
+ 'handler,ctx', [
+ ('Requests', 'https'),
+ ('CurlCFFI', 'https'),
+ ], indirect=True)
+class TestHTTPConnectProxy:
+ def test_http_connect_no_auth(self, handler, ctx):
+ with ctx.http_server(HTTPConnectProxyHandler) as server_address:
+ with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
+ proxy_info = ctx.proxy_info_request(rh)
+ assert proxy_info['proxy'] == server_address
+ assert proxy_info['connect'] is True
+ assert 'Proxy-Authorization' not in proxy_info['headers']
+
+ def test_http_connect_auth(self, handler, ctx):
+ with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
+ with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
+ proxy_info = ctx.proxy_info_request(rh)
+ assert proxy_info['proxy'] == server_address
+ assert 'Proxy-Authorization' in proxy_info['headers']
+
+ @pytest.mark.skip_handler(
+ 'Requests',
+ 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374'
+ )
+ def test_http_connect_bad_auth(self, handler, ctx):
+ with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
+ with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
+ with pytest.raises(ProxyError):
+ ctx.proxy_info_request(rh)
+
+ def test_http_connect_source_address(self, handler, ctx):
+ with ctx.http_server(HTTPConnectProxyHandler) as server_address:
+ source_address = f'127.0.0.{random.randint(5, 255)}'
+ verify_address_availability(source_address)
+ with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
+ source_address=source_address,
+ verify=False) as rh:
+ proxy_info = ctx.proxy_info_request(rh)
+ assert proxy_info['proxy'] == server_address
+ assert proxy_info['client_address'][0] == source_address
+
+ @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
+ def test_https_connect_proxy(self, handler, ctx):
+ with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
+ with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
+ proxy_info = ctx.proxy_info_request(rh)
+ assert proxy_info['proxy'] == server_address
+ assert proxy_info['connect'] is True
+ assert 'Proxy-Authorization' not in proxy_info['headers']
+
+ @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
+ def test_https_connect_verify_failed(self, handler, ctx):
+ with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
+ with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
+ # Accept SSLError as may not be feasible to tell if it is proxy or request error.
+ # note: if request proto also does ssl verification, this may also be the error of the request.
+ # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
+ with pytest.raises((ProxyError, SSLError)):
+ ctx.proxy_info_request(rh)
+
+ @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
+ def test_https_connect_proxy_auth(self, handler, ctx):
+ with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address:
+ with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh:
+ proxy_info = ctx.proxy_info_request(rh)
+ assert proxy_info['proxy'] == server_address
+ assert 'Proxy-Authorization' in proxy_info['headers']
diff --git a/test/test_networking.py b/test/test_networking.py
index 628f1f1..d127cbb 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -6,6 +6,8 @@ import sys
import pytest
+from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import gzip
@@ -27,9 +29,14 @@ import zlib
from email.message import Message
from http.cookiejar import CookieJar
-from test.helper import FakeYDL, http_server_port, verify_address_availability
+from test.helper import (
+ FakeYDL,
+ http_server_port,
+ validate_and_send,
+ verify_address_availability,
+)
from yt_dlp.cookies import YoutubeDLCookieJar
-from yt_dlp.dependencies import brotli, requests, urllib3
+from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
from yt_dlp.networking import (
HEADRequest,
PUTRequest,
@@ -50,31 +57,20 @@ from yt_dlp.networking.exceptions import (
TransportError,
UnsupportedRequest,
)
+from yt_dlp.networking.impersonate import (
+ ImpersonateRequestHandler,
+ ImpersonateTarget,
+)
+from yt_dlp.utils import YoutubeDLError
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
-from yt_dlp.utils.networking import HTTPHeaderDict
-
-from test.conftest import validate_and_send
+from yt_dlp.utils.networking import HTTPHeaderDict, std_headers
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-def _build_proxy_handler(name):
- class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
- proxy_name = name
-
- def log_message(self, format, *args):
- pass
-
- def do_GET(self):
- self.send_response(200)
- self.send_header('Content-Type', 'text/plain; charset=utf-8')
- self.end_headers()
- self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
- return HTTPTestRequestHandler
-
-
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
protocol_version = 'HTTP/1.1'
+ default_request_version = 'HTTP/1.1'
def log_message(self, format, *args):
pass
@@ -112,6 +108,8 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
def _read_data(self):
if 'Content-Length' in self.headers:
return self.rfile.read(int(self.headers['Content-Length']))
+ else:
+ return b''
def do_POST(self):
data = self._read_data() + str(self.headers).encode()
@@ -195,7 +193,8 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
self._headers()
elif self.path.startswith('/308-to-headers'):
self.send_response(308)
- self.send_header('Location', '/headers')
+ # redirect to "localhost" for testing cookie redirection handling
+ self.send_header('Location', f'http://localhost:{self.connection.getsockname()[1]}/headers')
self.send_header('Content-Length', '0')
self.end_headers()
elif self.path == '/trailing_garbage':
@@ -309,8 +308,9 @@ class TestRequestHandlerBase:
cls.https_server_thread.start()
+@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
class TestHTTPRequestHandler(TestRequestHandlerBase):
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+
def test_verify_cert(self, handler):
with handler() as rh:
with pytest.raises(CertificateVerifyError):
@@ -321,7 +321,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert r.status == 200
r.close()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_ssl_error(self, handler):
# HTTPS server with too old TLS version
# XXX: is there a better way to test this than to create a new server?
@@ -335,11 +334,10 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
https_server_thread.start()
with handler(verify=False) as rh:
- with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
+ with pytest.raises(SSLError, match=r'(?i)ssl(?:v3|/tls).alert.handshake.failure') as exc_info:
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
assert not issubclass(exc_info.type, CertificateVerifyError)
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_percent_encode(self, handler):
with handler() as rh:
# Unicode characters should be encoded with uppercase percent-encoding
@@ -351,7 +349,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.status == 200
res.close()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
@pytest.mark.parametrize('path', [
'/a/b/./../../headers',
'/redirect_dotsegments',
@@ -367,14 +364,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
res.close()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)')
def test_unicode_path_redirection(self, handler):
with handler() as rh:
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
r.close()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_raise_http_error(self, handler):
with handler() as rh:
for bad_status in (400, 500, 599, 302):
@@ -384,7 +380,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
# Should not raise an error
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_response_url(self, handler):
with handler() as rh:
# Response url should be that of the last url in redirect chain
@@ -395,62 +390,48 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
res2.close()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
- def test_redirect(self, handler):
+ # Covers some basic cases we expect some level of consistency between request handlers for
+ @pytest.mark.parametrize('redirect_status,method,expected', [
+ # A 303 must either use GET or HEAD for subsequent request
+ (303, 'POST', ('', 'GET', False)),
+ (303, 'HEAD', ('', 'HEAD', False)),
+
+ # 301 and 302 turn POST only into a GET
+ (301, 'POST', ('', 'GET', False)),
+ (301, 'HEAD', ('', 'HEAD', False)),
+ (302, 'POST', ('', 'GET', False)),
+ (302, 'HEAD', ('', 'HEAD', False)),
+
+ # 307 and 308 should not change method
+ (307, 'POST', ('testdata', 'POST', True)),
+ (308, 'POST', ('testdata', 'POST', True)),
+ (307, 'HEAD', ('', 'HEAD', False)),
+ (308, 'HEAD', ('', 'HEAD', False)),
+ ])
+ def test_redirect(self, handler, redirect_status, method, expected):
with handler() as rh:
- def do_req(redirect_status, method, assert_no_content=False):
- data = b'testdata' if method in ('POST', 'PUT') else None
- res = validate_and_send(
- rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
-
- headers = b''
- data_sent = b''
- if data is not None:
- data_sent += res.read(len(data))
- if data_sent != data:
- headers += data_sent
- data_sent = b''
-
- headers += res.read()
-
- if assert_no_content or data is None:
- assert b'Content-Type' not in headers
- assert b'Content-Length' not in headers
- else:
- assert b'Content-Type' in headers
- assert b'Content-Length' in headers
-
- return data_sent.decode(), res.headers.get('method', '')
-
- # A 303 must either use GET or HEAD for subsequent request
- assert do_req(303, 'POST', True) == ('', 'GET')
- assert do_req(303, 'HEAD') == ('', 'HEAD')
-
- assert do_req(303, 'PUT', True) == ('', 'GET')
-
- # 301 and 302 turn POST only into a GET
- assert do_req(301, 'POST', True) == ('', 'GET')
- assert do_req(301, 'HEAD') == ('', 'HEAD')
- assert do_req(302, 'POST', True) == ('', 'GET')
- assert do_req(302, 'HEAD') == ('', 'HEAD')
-
- assert do_req(301, 'PUT') == ('testdata', 'PUT')
- assert do_req(302, 'PUT') == ('testdata', 'PUT')
+ data = b'testdata' if method == 'POST' else None
+ headers = {}
+ if data is not None:
+ headers['Content-Type'] = 'application/test'
+ res = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data,
+ headers=headers))
- # 307 and 308 should not change method
- for m in ('POST', 'PUT'):
- assert do_req(307, m) == ('testdata', m)
- assert do_req(308, m) == ('testdata', m)
+ headers = b''
+ data_recv = b''
+ if data is not None:
+ data_recv += res.read(len(data))
+ if data_recv != data:
+ headers += data_recv
+ data_recv = b''
- assert do_req(307, 'HEAD') == ('', 'HEAD')
- assert do_req(308, 'HEAD') == ('', 'HEAD')
+ headers += res.read()
- # These should not redirect and instead raise an HTTPError
- for code in (300, 304, 305, 306):
- with pytest.raises(HTTPError):
- do_req(code, 'GET')
+ assert expected[0] == data_recv.decode()
+ assert expected[1] == res.headers.get('method')
+ assert expected[2] == ('content-length' in headers.decode().lower())
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_request_cookie_header(self, handler):
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
with handler() as rh:
@@ -459,16 +440,17 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
rh, Request(
f'http://127.0.0.1:{self.http_port}/headers',
headers={'Cookie': 'test=test'})).read().decode()
- assert 'Cookie: test=test' in res
+ assert 'cookie: test=test' in res.lower()
# Specified Cookie header should be removed on any redirect
res = validate_and_send(
rh, Request(
f'http://127.0.0.1:{self.http_port}/308-to-headers',
- headers={'Cookie': 'test=test'})).read().decode()
- assert 'Cookie: test=test' not in res
+ headers={'Cookie': 'test=test2'})).read().decode()
+ assert 'cookie: test=test2' not in res.lower()
# Specified Cookie header should override global cookiejar for that request
+ # Whether cookies from the cookiejar is applied on the redirect is considered undefined for now
cookiejar = YoutubeDLCookieJar()
cookiejar.set_cookie(http.cookiejar.Cookie(
version=0, name='test', value='ytdlp', port=None, port_specified=False,
@@ -478,23 +460,20 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
with handler(cookiejar=cookiejar) as rh:
data = validate_and_send(
- rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read()
- assert b'Cookie: test=ytdlp' not in data
- assert b'Cookie: test=test' in data
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test3'})).read()
+ assert b'cookie: test=ytdlp' not in data.lower()
+ assert b'cookie: test=test3' in data.lower()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_redirect_loop(self, handler):
with handler() as rh:
with pytest.raises(HTTPError, match='redirect loop'):
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_incompleteread(self, handler):
with handler(timeout=2) as rh:
- with pytest.raises(IncompleteRead):
+ with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_cookies(self, handler):
cookiejar = YoutubeDLCookieJar()
cookiejar.set_cookie(http.cookiejar.Cookie(
@@ -503,47 +482,59 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
with handler(cookiejar=cookiejar) as rh:
data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
- assert b'Cookie: test=ytdlp' in data
+ assert b'cookie: test=ytdlp' in data.lower()
# Per request
with handler() as rh:
data = validate_and_send(
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
- assert b'Cookie: test=ytdlp' in data
+ assert b'cookie: test=ytdlp' in data.lower()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_headers(self, handler):
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
# Global Headers
- data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
- assert b'Test1: test' in data
+ data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read().lower()
+ assert b'test1: test' in data
# Per request headers, merged with global
data = validate_and_send(rh, Request(
- f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read()
- assert b'Test1: test' in data
- assert b'Test2: changed' in data
- assert b'Test2: test2' not in data
- assert b'Test3: test3' in data
-
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
- def test_timeout(self, handler):
+ f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read().lower()
+ assert b'test1: test' in data
+ assert b'test2: changed' in data
+ assert b'test2: test2' not in data
+ assert b'test3: test3' in data
+
+ def test_read_timeout(self, handler):
with handler() as rh:
# Default timeout is 20 seconds, so this should go through
validate_and_send(
- rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3'))
+ rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
- with handler(timeout=0.5) as rh:
+ with handler(timeout=0.1) as rh:
with pytest.raises(TransportError):
validate_and_send(
- rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
+ rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_5'))
# Per request timeout, should override handler timeout
validate_and_send(
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ def test_connect_timeout(self, handler):
+ # nothing should be listening on this port
+ connect_timeout_url = 'http://10.255.255.255'
+ with handler(timeout=0.01) as rh, pytest.raises(TransportError):
+ now = time.time()
+ validate_and_send(rh, Request(connect_timeout_url))
+ assert time.time() - now < DEFAULT_TIMEOUT
+
+ # Per request timeout, should override handler timeout
+ request = Request(connect_timeout_url, extensions={'timeout': 0.01})
+ with handler() as rh, pytest.raises(TransportError):
+ now = time.time()
+ validate_and_send(rh, request)
+ assert time.time() - now < DEFAULT_TIMEOUT
+
def test_source_address(self, handler):
source_address = f'127.0.0.{random.randint(5, 255)}'
# on some systems these loopback addresses we need for testing may not be available
@@ -554,13 +545,14 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
assert source_address == data
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ # Not supported by CurlCFFI
+ @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
def test_gzip_trailing_garbage(self, handler):
with handler() as rh:
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
assert data == '<html><video src="/vid.mp4" /></html>'
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ @pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
def test_brotli(self, handler):
with handler() as rh:
@@ -571,7 +563,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.headers.get('Content-Encoding') == 'br'
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_deflate(self, handler):
with handler() as rh:
res = validate_and_send(
@@ -581,7 +572,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.headers.get('Content-Encoding') == 'deflate'
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_gzip(self, handler):
with handler() as rh:
res = validate_and_send(
@@ -591,7 +581,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.headers.get('Content-Encoding') == 'gzip'
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_multiple_encodings(self, handler):
with handler() as rh:
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
@@ -602,17 +591,16 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.headers.get('Content-Encoding') == pair
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
def test_unsupported_encoding(self, handler):
with handler() as rh:
res = validate_and_send(
rh, Request(
f'http://127.0.0.1:{self.http_port}/content-encoding',
- headers={'ytdl-encoding': 'unsupported'}))
+ headers={'ytdl-encoding': 'unsupported', 'Accept-Encoding': '*'}))
assert res.headers.get('Content-Encoding') == 'unsupported'
assert res.read() == b'raw'
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_read(self, handler):
with handler() as rh:
res = validate_and_send(
@@ -620,85 +608,52 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
assert res.readable()
assert res.read(1) == b'H'
assert res.read(3) == b'ost'
+ assert res.read().decode().endswith('\n\n')
+ assert res.read() == b''
+
+ def test_request_disable_proxy(self, handler):
+ for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
+ # Given the handler is configured with a proxy
+ with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
+ # When a proxy is explicitly set to None for the request
+ res = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None}))
+ # Then no proxy should be used
+ res.close()
+ assert res.status == 200
-
-class TestHTTPProxy(TestRequestHandlerBase):
- @classmethod
- def setup_class(cls):
- super().setup_class()
- # HTTP Proxy server
- cls.proxy = http.server.ThreadingHTTPServer(
- ('127.0.0.1', 0), _build_proxy_handler('normal'))
- cls.proxy_port = http_server_port(cls.proxy)
- cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
- cls.proxy_thread.daemon = True
- cls.proxy_thread.start()
-
- # Geo proxy server
- cls.geo_proxy = http.server.ThreadingHTTPServer(
- ('127.0.0.1', 0), _build_proxy_handler('geo'))
- cls.geo_port = http_server_port(cls.geo_proxy)
- cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
- cls.geo_proxy_thread.daemon = True
- cls.geo_proxy_thread.start()
-
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
- def test_http_proxy(self, handler):
- http_proxy = f'http://127.0.0.1:{self.proxy_port}'
- geo_proxy = f'http://127.0.0.1:{self.geo_port}'
-
- # Test global http proxy
- # Test per request http proxy
- # Test per request http proxy disables proxy
- url = 'http://foo.com/bar'
-
- # Global HTTP proxy
- with handler(proxies={'http': http_proxy}) as rh:
- res = validate_and_send(rh, Request(url)).read().decode()
- assert res == f'normal: {url}'
-
- # Per request proxy overrides global
- res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
- assert res == f'geo: {url}'
-
- # and setting to None disables all proxies for that request
- real_url = f'http://127.0.0.1:{self.http_port}/headers'
- res = validate_and_send(
- rh, Request(real_url, proxies={'http': None})).read().decode()
- assert res != f'normal: {real_url}'
- assert 'Accept' in res
-
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ @pytest.mark.skip_handlers_if(
+ lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
def test_noproxy(self, handler):
- with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
- # NO_PROXY
- for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
- nop_response = validate_and_send(
- rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
- 'utf-8')
- assert 'Accept' in nop_response
-
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
+ for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
+ # Given the handler is configured with a proxy
+ with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
+ for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
+ # When request no proxy includes the request url host
+ nop_response = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy}))
+ # Then the proxy should not be used
+ assert nop_response.status == 200
+ nop_response.close()
+
+ @pytest.mark.skip_handlers_if(
+ lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
def test_allproxy(self, handler):
- url = 'http://foo.com/bar'
- with handler() as rh:
- response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
- 'utf-8')
- assert response == f'normal: {url}'
+ # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
+ # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
+ with handler(proxies={'all': 'http://10.255.255.255'}, timeout=0.1) as rh:
+ with pytest.raises(TransportError):
+ validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
- def test_http_proxy_with_idn(self, handler):
- with handler(proxies={
- 'http': f'http://127.0.0.1:{self.proxy_port}',
- }) as rh:
- url = 'http://中文.tw/'
- response = rh.send(Request(url)).read().decode()
- # b'xn--fiq228c' is '中文'.encode('idna')
- assert response == 'normal: http://xn--fiq228c.tw/'
+ with handler(timeout=0.1) as rh:
+ with pytest.raises(TransportError):
+ validate_and_send(
+ rh, Request(
+ f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': 'http://10.255.255.255'})).close()
+@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
class TestClientCertificate:
-
@classmethod
def setup_class(cls):
certfn = os.path.join(TEST_DIR, 'testcert.pem')
@@ -724,27 +679,23 @@ class TestClientCertificate:
) as rh:
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_certificate_combined_nopass(self, handler):
self._run_test(handler, client_cert={
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
})
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_certificate_nocombined_nopass(self, handler):
self._run_test(handler, client_cert={
'client_certificate': os.path.join(self.certdir, 'client.crt'),
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
})
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_certificate_combined_pass(self, handler):
self._run_test(handler, client_cert={
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
'client_certificate_password': 'foobar',
})
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_certificate_nocombined_pass(self, handler):
self._run_test(handler, client_cert={
'client_certificate': os.path.join(self.certdir, 'client.crt'),
@@ -753,6 +704,37 @@ class TestClientCertificate:
})
+@pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
+class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase):
+ def test_supported_impersonate_targets(self, handler):
+ with handler(headers=std_headers) as rh:
+ # note: this assumes the impersonate request handler supports the impersonate extension
+ for target in rh.supported_targets:
+ res = validate_and_send(rh, Request(
+ f'http://127.0.0.1:{self.http_port}/headers', extensions={'impersonate': target}))
+ assert res.status == 200
+ assert std_headers['user-agent'].lower() not in res.read().decode().lower()
+
+ def test_response_extensions(self, handler):
+ with handler() as rh:
+ for target in rh.supported_targets:
+ request = Request(
+ f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target})
+ res = validate_and_send(rh, request)
+ assert res.extensions['impersonate'] == rh._get_request_target(request)
+
+ def test_http_error_response_extensions(self, handler):
+ with handler() as rh:
+ for target in rh.supported_targets:
+ request = Request(
+ f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target})
+ try:
+ validate_and_send(rh, request)
+ except HTTPError as e:
+ res = e.response
+ assert res.extensions['impersonate'] == rh._get_request_target(request)
+
+
class TestRequestHandlerMisc:
"""Misc generic tests for request handlers, not related to request or validation testing"""
@pytest.mark.parametrize('handler,logger_name', [
@@ -772,8 +754,8 @@ class TestRequestHandlerMisc:
assert len(logging_handlers) == before_count
+@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
class TestUrllibRequestHandler(TestRequestHandlerBase):
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
def test_file_urls(self, handler):
# See https://github.com/ytdl-org/youtube-dl/issues/8227
tf = tempfile.NamedTemporaryFile(delete=False)
@@ -795,7 +777,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
os.unlink(tf.name)
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
def test_http_error_returns_content(self, handler):
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
def get_response():
@@ -808,7 +789,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
assert get_response().read() == b'<html></html>'
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
def test_verify_cert_error_text(self, handler):
# Check the output of the error message
with handler() as rh:
@@ -818,7 +798,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
):
validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
- @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
@pytest.mark.parametrize('req,match,version_check', [
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
# bpo-39603: Check implemented in 3.7.9+, 3.8.5+
@@ -931,6 +910,172 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
assert called
+@pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
+class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
+
+ @pytest.mark.parametrize('params,extensions', [
+ ({}, {'impersonate': ImpersonateTarget('chrome')}),
+ ({'impersonate': ImpersonateTarget('chrome', '110')}, {}),
+ ({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}),
+ ])
+ def test_impersonate(self, handler, params, extensions):
+ with handler(headers=std_headers, **params) as rh:
+ res = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions=extensions)).read().decode()
+ assert 'sec-ch-ua: "Chromium";v="110"' in res
+ # Check that user agent is added over ours
+ assert 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' in res
+
+ def test_headers(self, handler):
+ with handler(headers=std_headers) as rh:
+ # Ensure curl-impersonate overrides our standard headers (usually added
+ res = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={
+ 'impersonate': ImpersonateTarget('safari')}, headers={'x-custom': 'test', 'sec-fetch-mode': 'custom'})).read().decode().lower()
+
+ assert std_headers['user-agent'].lower() not in res
+ assert std_headers['accept-language'].lower() not in res
+ assert std_headers['sec-fetch-mode'].lower() not in res
+ # other than UA, custom headers that differ from std_headers should be kept
+ assert 'sec-fetch-mode: custom' in res
+ assert 'x-custom: test' in res
+ # but when not impersonating don't remove std_headers
+ res = validate_and_send(
+ rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'x-custom': 'test'})).read().decode().lower()
+ # std_headers should be present
+ for k, v in std_headers.items():
+ assert f'{k}: {v}'.lower() in res
+
+ @pytest.mark.parametrize('raised,expected,match', [
+ (lambda: curl_cffi.requests.errors.RequestsError(
+ '', code=curl_cffi.const.CurlECode.PARTIAL_FILE), IncompleteRead, None),
+ (lambda: curl_cffi.requests.errors.RequestsError(
+ '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
+ (lambda: curl_cffi.requests.errors.RequestsError(
+ '', code=curl_cffi.const.CurlECode.RECV_ERROR), TransportError, None),
+ ])
+ def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
+ import curl_cffi.requests
+
+ from yt_dlp.networking._curlcffi import CurlCFFIResponseAdapter
+ curl_res = curl_cffi.requests.Response()
+ res = CurlCFFIResponseAdapter(curl_res)
+
+ def mock_read(*args, **kwargs):
+ try:
+ raise raised()
+ except Exception as e:
+ e.response = curl_res
+ raise
+ monkeypatch.setattr(res.fp, 'read', mock_read)
+
+ with pytest.raises(expected, match=match) as exc_info:
+ res.read()
+
+ assert exc_info.type is expected
+
+ @pytest.mark.parametrize('raised,expected,match', [
+ (lambda: curl_cffi.requests.errors.RequestsError(
+ '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
+ (lambda: curl_cffi.requests.errors.RequestsError(
+ '', code=curl_cffi.const.CurlECode.PEER_FAILED_VERIFICATION), CertificateVerifyError, None),
+ (lambda: curl_cffi.requests.errors.RequestsError(
+ '', code=curl_cffi.const.CurlECode.SSL_CONNECT_ERROR), SSLError, None),
+ (lambda: curl_cffi.requests.errors.RequestsError(
+ '', code=curl_cffi.const.CurlECode.TOO_MANY_REDIRECTS), HTTPError, None),
+ (lambda: curl_cffi.requests.errors.RequestsError(
+ '', code=curl_cffi.const.CurlECode.PROXY), ProxyError, None),
+ ])
+ def test_request_error_mapping(self, handler, monkeypatch, raised, expected, match):
+ import curl_cffi.requests
+ curl_res = curl_cffi.requests.Response()
+ curl_res.status_code = 301
+
+ with handler() as rh:
+ original_get_instance = rh._get_instance
+
+ def mock_get_instance(*args, **kwargs):
+ instance = original_get_instance(*args, **kwargs)
+
+ def request(*_, **__):
+ try:
+ raise raised()
+ except Exception as e:
+ e.response = curl_res
+ raise
+ monkeypatch.setattr(instance, 'request', request)
+ return instance
+
+ monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
+
+ with pytest.raises(expected) as exc_info:
+ rh.send(Request('http://fake'))
+
+ assert exc_info.type is expected
+
+ def test_response_reader(self, handler):
+ class FakeResponse:
+ def __init__(self, raise_error=False):
+ self.raise_error = raise_error
+ self.closed = False
+
+ def iter_content(self):
+ yield b'foo'
+ yield b'bar'
+ yield b'z'
+ if self.raise_error:
+ raise Exception('test')
+
+ def close(self):
+ self.closed = True
+
+ from yt_dlp.networking._curlcffi import CurlCFFIResponseReader
+
+ res = CurlCFFIResponseReader(FakeResponse())
+ assert res.readable
+ assert res.bytes_read == 0
+ assert res.read(1) == b'f'
+ assert res.bytes_read == 3
+ assert res._buffer == b'oo'
+
+ assert res.read(2) == b'oo'
+ assert res.bytes_read == 3
+ assert res._buffer == b''
+
+ assert res.read(2) == b'ba'
+ assert res.bytes_read == 6
+ assert res._buffer == b'r'
+
+ assert res.read(3) == b'rz'
+ assert res.bytes_read == 7
+ assert res._buffer == b''
+ assert res.closed
+ assert res._response.closed
+
+ # should handle no size param
+ res2 = CurlCFFIResponseReader(FakeResponse())
+ assert res2.read() == b'foobarz'
+ assert res2.bytes_read == 7
+ assert res2._buffer == b''
+ assert res2.closed
+
+ # should close on an exception
+ res3 = CurlCFFIResponseReader(FakeResponse(raise_error=True))
+ with pytest.raises(Exception, match='test'):
+ res3.read()
+ assert res3._buffer == b''
+ assert res3.bytes_read == 7
+ assert res3.closed
+
+ # buffer should be cleared on close
+ res4 = CurlCFFIResponseReader(FakeResponse())
+ res4.read(2)
+ assert res4._buffer == b'o'
+ res4.close()
+ assert res4.closed
+ assert res4._buffer == b''
+
+
def run_validation(handler, error, req, **handler_kwargs):
with handler(**handler_kwargs) as rh:
if error:
@@ -975,12 +1120,16 @@ class TestRequestHandlerValidation:
('ws', False, {}),
('wss', False, {}),
]),
+ ('CurlCFFI', [
+ ('http', False, {}),
+ ('https', False, {}),
+ ]),
(NoCheckRH, [('http', False, {})]),
(ValidationRH, [('http', UnsupportedRequest, {})])
]
PROXY_SCHEME_TESTS = [
- # scheme, expected to fail
+ # proxy scheme, expected to fail
('Urllib', 'http', [
('http', False),
('https', UnsupportedRequest),
@@ -998,26 +1147,49 @@ class TestRequestHandlerValidation:
('socks5', False),
('socks5h', False),
]),
+ ('CurlCFFI', 'http', [
+ ('http', False),
+ ('https', False),
+ ('socks4', False),
+ ('socks4a', False),
+ ('socks5', False),
+ ('socks5h', False),
+ ]),
+ ('Websockets', 'ws', [
+ ('http', UnsupportedRequest),
+ ('https', UnsupportedRequest),
+ ('socks4', False),
+ ('socks4a', False),
+ ('socks5', False),
+ ('socks5h', False),
+ ]),
(NoCheckRH, 'http', [('http', False)]),
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
- ('Websockets', 'ws', [('http', UnsupportedRequest)]),
(NoCheckRH, 'http', [('http', False)]),
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
]
PROXY_KEY_TESTS = [
- # key, expected to fail
- ('Urllib', [
- ('all', False),
- ('unrelated', False),
+ # proxy key, proxy scheme, expected to fail
+ ('Urllib', 'http', [
+ ('all', 'http', False),
+ ('unrelated', 'http', False),
]),
- ('Requests', [
- ('all', False),
- ('unrelated', False),
+ ('Requests', 'http', [
+ ('all', 'http', False),
+ ('unrelated', 'http', False),
+ ]),
+ ('CurlCFFI', 'http', [
+ ('all', 'http', False),
+ ('unrelated', 'http', False),
+ ]),
+ ('Websockets', 'ws', [
+ ('all', 'socks5', False),
+ ('unrelated', 'socks5', False),
]),
- (NoCheckRH, [('all', False)]),
- (HTTPSupportedRH, [('all', UnsupportedRequest)]),
- (HTTPSupportedRH, [('no', UnsupportedRequest)]),
+ (NoCheckRH, 'http', [('all', 'http', False)]),
+ (HTTPSupportedRH, 'http', [('all', 'http', UnsupportedRequest)]),
+ (HTTPSupportedRH, 'http', [('no', 'http', UnsupportedRequest)]),
]
EXTENSION_TESTS = [
@@ -1036,6 +1208,19 @@ class TestRequestHandlerValidation:
({'timeout': 'notatimeout'}, AssertionError),
({'unsupported': 'value'}, UnsupportedRequest),
]),
+ ('CurlCFFI', 'http', [
+ ({'cookiejar': 'notacookiejar'}, AssertionError),
+ ({'cookiejar': YoutubeDLCookieJar()}, False),
+ ({'timeout': 1}, False),
+ ({'timeout': 'notatimeout'}, AssertionError),
+ ({'unsupported': 'value'}, UnsupportedRequest),
+ ({'impersonate': ImpersonateTarget('badtarget', None, None, None)}, UnsupportedRequest),
+ ({'impersonate': 123}, AssertionError),
+ ({'impersonate': ImpersonateTarget('chrome', None, None, None)}, False),
+ ({'impersonate': ImpersonateTarget(None, None, None, None)}, False),
+ ({'impersonate': ImpersonateTarget()}, False),
+ ({'impersonate': 'chrome'}, AssertionError)
+ ]),
(NoCheckRH, 'http', [
({'cookiejar': 'notacookiejar'}, False),
({'somerandom': 'test'}, False), # but any extension is allowed through
@@ -1046,28 +1231,54 @@ class TestRequestHandlerValidation:
]),
]
+ @pytest.mark.parametrize('handler,fail,scheme', [
+ ('Urllib', False, 'http'),
+ ('Requests', False, 'http'),
+ ('CurlCFFI', False, 'http'),
+ ('Websockets', False, 'ws')
+ ], indirect=['handler'])
+ def test_no_proxy(self, handler, fail, scheme):
+ run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'}))
+ run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'})
+
+ @pytest.mark.parametrize('handler,scheme', [
+ ('Urllib', 'http'),
+ (HTTPSupportedRH, 'http'),
+ ('Requests', 'http'),
+ ('CurlCFFI', 'http'),
+ ('Websockets', 'ws')
+ ], indirect=['handler'])
+ def test_empty_proxy(self, handler, scheme):
+ run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None}))
+ run_validation(handler, False, Request(f'{scheme}://'), proxies={scheme: None})
+
+ @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
+ @pytest.mark.parametrize('handler,scheme', [
+ ('Urllib', 'http'),
+ (HTTPSupportedRH, 'http'),
+ ('Requests', 'http'),
+ ('CurlCFFI', 'http'),
+ ('Websockets', 'ws')
+ ], indirect=['handler'])
+ def test_invalid_proxy_url(self, handler, scheme, proxy_url):
+ run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url}))
+
@pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
(handler_tests[0], scheme, fail, handler_kwargs)
for handler_tests in URL_SCHEME_TESTS
for scheme, fail, handler_kwargs in handler_tests[1]
-
], indirect=['handler'])
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
- @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler'])
- def test_no_proxy(self, handler, fail):
- run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
- run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
-
- @pytest.mark.parametrize('handler,proxy_key,fail', [
- (handler_tests[0], proxy_key, fail)
+ @pytest.mark.parametrize('handler,scheme,proxy_key,proxy_scheme,fail', [
+ (handler_tests[0], handler_tests[1], proxy_key, proxy_scheme, fail)
for handler_tests in PROXY_KEY_TESTS
- for proxy_key, fail in handler_tests[1]
+ for proxy_key, proxy_scheme, fail in handler_tests[2]
], indirect=['handler'])
- def test_proxy_key(self, handler, proxy_key, fail):
- run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
- run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
+ def test_proxy_key(self, handler, scheme, proxy_key, proxy_scheme, fail):
+ run_validation(handler, fail, Request(f'{scheme}://', proxies={proxy_key: f'{proxy_scheme}://example.com'}))
+ run_validation(handler, fail, Request(f'{scheme}://'), proxies={proxy_key: f'{proxy_scheme}://example.com'})
@pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
(handler_tests[0], handler_tests[1], scheme, fail)
@@ -1078,16 +1289,6 @@ class TestRequestHandlerValidation:
run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
- @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
- def test_empty_proxy(self, handler):
- run_validation(handler, False, Request('http://', proxies={'http': None}))
- run_validation(handler, False, Request('http://'), proxies={'http': None})
-
- @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
- @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
- def test_invalid_proxy_url(self, handler, proxy_url):
- run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
-
@pytest.mark.parametrize('handler,scheme,extensions,fail', [
(handler_tests[0], handler_tests[1], extensions, fail)
for handler_tests in EXTENSION_TESTS
@@ -1113,6 +1314,10 @@ class FakeResponse(Response):
class FakeRH(RequestHandler):
+ def __init__(self, *args, **params):
+ self.params = params
+ super().__init__(*args, **params)
+
def _validate(self, request):
return
@@ -1271,15 +1476,10 @@ class TestYoutubeDLNetworking:
('', {'all': '__noproxy__'}),
(None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
])
- def test_proxy(self, proxy, expected):
- old_http_proxy = os.environ.get('HTTP_PROXY')
- try:
- os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env
- with FakeYDL({'proxy': proxy}) as ydl:
- assert ydl.proxies == expected
- finally:
- if old_http_proxy:
- os.environ['HTTP_PROXY'] = old_http_proxy
+ def test_proxy(self, proxy, expected, monkeypatch):
+ monkeypatch.setenv('HTTP_PROXY', 'http://127.0.0.1:8081')
+ with FakeYDL({'proxy': proxy}) as ydl:
+ assert ydl.proxies == expected
def test_compat_request(self):
with FakeRHYDL() as ydl:
@@ -1331,6 +1531,95 @@ class TestYoutubeDLNetworking:
with pytest.raises(SSLError, match='testerror'):
ydl.urlopen('ssl://testerror')
+ def test_unsupported_impersonate_target(self):
+ class FakeImpersonationRHYDL(FakeYDL):
+ def __init__(self, *args, **kwargs):
+ class HTTPRH(RequestHandler):
+ def _send(self, request: Request):
+ pass
+ _SUPPORTED_URL_SCHEMES = ('http',)
+ _SUPPORTED_PROXY_SCHEMES = None
+
+ super().__init__(*args, **kwargs)
+ self._request_director = self.build_request_director([HTTPRH])
+
+ with FakeImpersonationRHYDL() as ydl:
+ with pytest.raises(
+ RequestError,
+ match=r'Impersonate target "test" is not available'
+ ):
+ ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
+
+ def test_unsupported_impersonate_extension(self):
+ class FakeHTTPRHYDL(FakeYDL):
+ def __init__(self, *args, **kwargs):
+ class IRH(ImpersonateRequestHandler):
+ def _send(self, request: Request):
+ pass
+
+ _SUPPORTED_URL_SCHEMES = ('http',)
+ _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc',): 'test'}
+ _SUPPORTED_PROXY_SCHEMES = None
+
+ super().__init__(*args, **kwargs)
+ self._request_director = self.build_request_director([IRH])
+
+ with FakeHTTPRHYDL() as ydl:
+ with pytest.raises(
+ RequestError,
+ match=r'Impersonate target "test" is not available'
+ ):
+ ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
+
+ def test_raise_impersonate_error(self):
+ with pytest.raises(
+ YoutubeDLError,
+ match=r'Impersonate target "test" is not available'
+ ):
+ FakeYDL({'impersonate': ImpersonateTarget('test', None, None, None)})
+
+ def test_pass_impersonate_param(self, monkeypatch):
+
+ class IRH(ImpersonateRequestHandler):
+ def _send(self, request: Request):
+ pass
+
+ _SUPPORTED_URL_SCHEMES = ('http',)
+ _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'}
+
+ # Bypass the check on initialize
+ brh = FakeYDL.build_request_director
+ monkeypatch.setattr(FakeYDL, 'build_request_director', lambda cls, handlers, preferences=None: brh(cls, handlers=[IRH]))
+
+ with FakeYDL({
+ 'impersonate': ImpersonateTarget('abc', None, None, None)
+ }) as ydl:
+ rh = self.build_handler(ydl, IRH)
+ assert rh.impersonate == ImpersonateTarget('abc', None, None, None)
+
+ def test_get_impersonate_targets(self):
+ handlers = []
+ for target_client in ('abc', 'xyz', 'asd'):
+ class TestRH(ImpersonateRequestHandler):
+ def _send(self, request: Request):
+ pass
+ _SUPPORTED_URL_SCHEMES = ('http',)
+ _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client,): 'test'}
+ RH_KEY = target_client
+ RH_NAME = target_client
+ handlers.append(TestRH)
+
+ with FakeYDL() as ydl:
+ ydl._request_director = ydl.build_request_director(handlers)
+ assert set(ydl._get_available_impersonate_targets()) == {
+ (ImpersonateTarget('xyz'), 'xyz'),
+ (ImpersonateTarget('abc'), 'abc'),
+ (ImpersonateTarget('asd'), 'asd')
+ }
+ assert ydl._impersonate_target_available(ImpersonateTarget('abc'))
+ assert ydl._impersonate_target_available(ImpersonateTarget())
+ assert not ydl._impersonate_target_available(ImpersonateTarget('zxy'))
+
@pytest.mark.parametrize('proxy_key,proxy_url,expected', [
('http', '__noproxy__', None),
('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
@@ -1341,23 +1630,17 @@ class TestYoutubeDLNetworking:
('http', 'socks4://example.com', 'socks4://example.com'),
('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
])
- def test_clean_proxy(self, proxy_key, proxy_url, expected):
+ def test_clean_proxy(self, proxy_key, proxy_url, expected, monkeypatch):
# proxies should be cleaned in urlopen()
with FakeRHYDL() as ydl:
req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
assert req.proxies[proxy_key] == expected
# and should also be cleaned when building the handler
- env_key = f'{proxy_key.upper()}_PROXY'
- old_env_proxy = os.environ.get(env_key)
- try:
- os.environ[env_key] = proxy_url # ensure that provided proxies override env
- with FakeYDL() as ydl:
- rh = self.build_handler(ydl)
- assert rh.proxies[proxy_key] == expected
- finally:
- if old_env_proxy:
- os.environ[env_key] = old_env_proxy
+ monkeypatch.setenv(f'{proxy_key.upper()}_PROXY', proxy_url)
+ with FakeYDL() as ydl:
+ rh = self.build_handler(ydl)
+ assert rh.proxies[proxy_key] == expected
def test_clean_proxy_header(self):
with FakeRHYDL() as ydl:
@@ -1629,3 +1912,71 @@ class TestResponse:
assert res.geturl() == res.url
assert res.info() is res.headers
assert res.getheader('test') == res.get_header('test')
+
+
+class TestImpersonateTarget:
+ @pytest.mark.parametrize('target_str,expected', [
+ ('abc', ImpersonateTarget('abc', None, None, None)),
+ ('abc-120_esr', ImpersonateTarget('abc', '120_esr', None, None)),
+ ('abc-120:xyz', ImpersonateTarget('abc', '120', 'xyz', None)),
+ ('abc-120:xyz-5.6', ImpersonateTarget('abc', '120', 'xyz', '5.6')),
+ ('abc:xyz', ImpersonateTarget('abc', None, 'xyz', None)),
+ ('abc:', ImpersonateTarget('abc', None, None, None)),
+ ('abc-120:', ImpersonateTarget('abc', '120', None, None)),
+ (':xyz', ImpersonateTarget(None, None, 'xyz', None)),
+ (':xyz-6.5', ImpersonateTarget(None, None, 'xyz', '6.5')),
+ (':', ImpersonateTarget(None, None, None, None)),
+ ('', ImpersonateTarget(None, None, None, None)),
+ ])
+ def test_target_from_str(self, target_str, expected):
+ assert ImpersonateTarget.from_str(target_str) == expected
+
+ @pytest.mark.parametrize('target_str', [
+ '-120', ':-12.0', '-12:-12', '-:-',
+ '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:'
+ ])
+ def test_target_from_invalid_str(self, target_str):
+ with pytest.raises(ValueError):
+ ImpersonateTarget.from_str(target_str)
+
+ @pytest.mark.parametrize('target,expected', [
+ (ImpersonateTarget('abc', None, None, None), 'abc'),
+ (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
+ (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
+ (ImpersonateTarget('abc', '120', 'xyz', '5'), 'abc-120:xyz-5'),
+ (ImpersonateTarget('abc', None, 'xyz', None), 'abc:xyz'),
+ (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
+ (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
+ (ImpersonateTarget('abc', None, 'xyz'), 'abc:xyz'),
+ (ImpersonateTarget(None, None, 'xyz', '6.5'), ':xyz-6.5'),
+ (ImpersonateTarget('abc', ), 'abc'),
+ (ImpersonateTarget(None, None, None, None), ''),
+ ])
+ def test_str(self, target, expected):
+ assert str(target) == expected
+
+ @pytest.mark.parametrize('args', [
+ ('abc', None, None, '5'),
+ ('abc', '120', None, '5'),
+ (None, '120', None, None),
+ (None, '120', None, '5'),
+ (None, None, None, '5'),
+ (None, '120', 'xyz', '5'),
+ ])
+ def test_invalid_impersonate_target(self, args):
+ with pytest.raises(ValueError):
+ ImpersonateTarget(*args)
+
+ @pytest.mark.parametrize('target1,target2,is_in,is_eq', [
+ (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', None, None, None), True, True),
+ (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', '120', None, None), True, False),
+ (ImpersonateTarget('abc', None, 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', None), True, False),
+ (ImpersonateTarget('abc', '121', 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', 'test'), False, False),
+ (ImpersonateTarget('abc'), ImpersonateTarget('abc', '120', 'xyz', 'test'), True, False),
+ (ImpersonateTarget('abc', '120', 'xyz', 'test'), ImpersonateTarget('abc'), True, False),
+ (ImpersonateTarget(), ImpersonateTarget('abc', '120', 'xyz'), True, False),
+ (ImpersonateTarget(), ImpersonateTarget(), True, True),
+ ])
+ def test_impersonate_target_in(self, target1, target2, is_in, is_eq):
+ assert (target1 in target2) is is_in
+ assert (target1 == target2) is is_eq
diff --git a/test/test_socks.py b/test/test_socks.py
index cb22b61..43d612d 100644
--- a/test/test_socks.py
+++ b/test/test_socks.py
@@ -286,8 +286,14 @@ def ctx(request):
return CTX_MAP[request.param]()
+@pytest.mark.parametrize(
+ 'handler,ctx', [
+ ('Urllib', 'http'),
+ ('Requests', 'http'),
+ ('Websockets', 'ws'),
+ ('CurlCFFI', 'http')
+ ], indirect=True)
class TestSocks4Proxy:
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks4_no_auth(self, handler, ctx):
with handler() as rh:
with ctx.socks_server(Socks4ProxyHandler) as server_address:
@@ -295,7 +301,6 @@ class TestSocks4Proxy:
rh, proxies={'all': f'socks4://{server_address}'})
assert response['version'] == 4
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks4_auth(self, handler, ctx):
with handler() as rh:
with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address:
@@ -305,7 +310,6 @@ class TestSocks4Proxy:
rh, proxies={'all': f'socks4://user:@{server_address}'})
assert response['version'] == 4
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks4a_ipv4_target(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
@@ -313,7 +317,6 @@ class TestSocks4Proxy:
assert response['version'] == 4
assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks4a_domain_target(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
@@ -322,7 +325,6 @@ class TestSocks4Proxy:
assert response['ipv4_address'] is None
assert response['domain_address'] == 'localhost'
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_ipv4_client_source_address(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
source_address = f'127.0.0.{random.randint(5, 255)}'
@@ -333,7 +335,6 @@ class TestSocks4Proxy:
assert response['client_address'][0] == source_address
assert response['version'] == 4
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
@pytest.mark.parametrize('reply_code', [
Socks4CD.REQUEST_REJECTED_OR_FAILED,
Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD,
@@ -345,7 +346,6 @@ class TestSocks4Proxy:
with pytest.raises(ProxyError):
ctx.socks_info_request(rh)
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_ipv6_socks4_proxy(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
@@ -354,7 +354,6 @@ class TestSocks4Proxy:
assert response['ipv4_address'] == '127.0.0.1'
assert response['version'] == 4
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_timeout(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
@@ -362,9 +361,15 @@ class TestSocks4Proxy:
ctx.socks_info_request(rh)
+@pytest.mark.parametrize(
+ 'handler,ctx', [
+ ('Urllib', 'http'),
+ ('Requests', 'http'),
+ ('Websockets', 'ws'),
+ ('CurlCFFI', 'http')
+ ], indirect=True)
class TestSocks5Proxy:
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_no_auth(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -372,7 +377,6 @@ class TestSocks5Proxy:
assert response['auth_methods'] == [0x0]
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_user_pass(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address:
with handler() as rh:
@@ -385,7 +389,6 @@ class TestSocks5Proxy:
assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS]
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_ipv4_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -393,7 +396,6 @@ class TestSocks5Proxy:
assert response['ipv4_address'] == '127.0.0.1'
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_domain_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -401,7 +403,6 @@ class TestSocks5Proxy:
assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5h_domain_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
@@ -410,7 +411,6 @@ class TestSocks5Proxy:
assert response['domain_address'] == 'localhost'
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5h_ip_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
@@ -419,7 +419,6 @@ class TestSocks5Proxy:
assert response['domain_address'] is None
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_ipv6_destination(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -427,7 +426,6 @@ class TestSocks5Proxy:
assert response['ipv6_address'] == '::1'
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_ipv6_socks5_proxy(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@@ -438,7 +436,6 @@ class TestSocks5Proxy:
# XXX: is there any feasible way of testing IPv6 source addresses?
# Same would go for non-proxy source_address test...
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_ipv4_client_source_address(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
source_address = f'127.0.0.{random.randint(5, 255)}'
@@ -448,7 +445,6 @@ class TestSocks5Proxy:
assert response['client_address'][0] == source_address
assert response['version'] == 5
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
@pytest.mark.parametrize('reply_code', [
Socks5Reply.GENERAL_FAILURE,
Socks5Reply.CONNECTION_NOT_ALLOWED,
@@ -465,7 +461,6 @@ class TestSocks5Proxy:
with pytest.raises(ProxyError):
ctx.socks_info_request(rh)
- @pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Websockets', 'ws')], indirect=True)
def test_timeout(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler, sleep=2) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}, timeout=1) as rh:
diff --git a/test/test_traversal.py b/test/test_traversal.py
new file mode 100644
index 0000000..9b2a27b
--- /dev/null
+++ b/test/test_traversal.py
@@ -0,0 +1,444 @@
+import http.cookies
+import re
+import xml.etree.ElementTree
+
+import pytest
+
+from yt_dlp.utils import dict_get, int_or_none, str_or_none
+from yt_dlp.utils.traversal import traverse_obj
+
+_TEST_DATA = {
+ 100: 100,
+ 1.2: 1.2,
+ 'str': 'str',
+ 'None': None,
+ '...': ...,
+ 'urls': [
+ {'index': 0, 'url': 'https://www.example.com/0'},
+ {'index': 1, 'url': 'https://www.example.com/1'},
+ ],
+ 'data': (
+ {'index': 2},
+ {'index': 3},
+ ),
+ 'dict': {},
+}
+
+
+class TestTraversal:
+ def test_traversal_base(self):
+ assert traverse_obj(_TEST_DATA, ('str',)) == 'str', \
+ 'allow tuple path'
+ assert traverse_obj(_TEST_DATA, ['str']) == 'str', \
+ 'allow list path'
+ assert traverse_obj(_TEST_DATA, (value for value in ("str",))) == 'str', \
+ 'allow iterable path'
+ assert traverse_obj(_TEST_DATA, 'str') == 'str', \
+ 'single items should be treated as a path'
+ assert traverse_obj(_TEST_DATA, 100) == 100, \
+ 'allow int path'
+ assert traverse_obj(_TEST_DATA, 1.2) == 1.2, \
+ 'allow float path'
+ assert traverse_obj(_TEST_DATA, None) == _TEST_DATA, \
+ '`None` should not perform any modification'
+
+ def test_traversal_ellipsis(self):
+ assert traverse_obj(_TEST_DATA, ...) == [x for x in _TEST_DATA.values() if x not in (None, {})], \
+ '`...` should give all non discarded values'
+ assert traverse_obj(_TEST_DATA, ('urls', 0, ...)) == list(_TEST_DATA['urls'][0].values()), \
+ '`...` selection for dicts should select all values'
+ assert traverse_obj(_TEST_DATA, (..., ..., 'url')) == ['https://www.example.com/0', 'https://www.example.com/1'], \
+ 'nested `...` queries should work'
+ assert traverse_obj(_TEST_DATA, (..., ..., 'index')) == list(range(4)), \
+ '`...` query result should be flattened'
+ assert traverse_obj(iter(range(4)), ...) == list(range(4)), \
+ '`...` should accept iterables'
+
+ def test_traversal_function(self):
+ filter_func = lambda x, y: x == 'urls' and isinstance(y, list)
+ assert traverse_obj(_TEST_DATA, filter_func) == [_TEST_DATA['urls']], \
+ 'function as query key should perform a filter based on (key, value)'
+ assert traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)) == ['str'], \
+ 'exceptions in the query function should be catched'
+ assert traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0) == [0, 2], \
+ 'function key should accept iterables'
+ # Wrong function signature should raise (debug mode)
+ with pytest.raises(Exception):
+ traverse_obj(_TEST_DATA, lambda a: ...)
+ with pytest.raises(Exception):
+ traverse_obj(_TEST_DATA, lambda a, b, c: ...)
+
+ def test_traversal_set(self):
+ # transformation/type, like `expected_type`
+ assert traverse_obj(_TEST_DATA, (..., {str.upper}, )) == ['STR'], \
+ 'Function in set should be a transformation'
+ assert traverse_obj(_TEST_DATA, (..., {str})) == ['str'], \
+ 'Type in set should be a type filter'
+ assert traverse_obj(_TEST_DATA, (..., {str, int})) == [100, 'str'], \
+ 'Multiple types in set should be a type filter'
+ assert traverse_obj(_TEST_DATA, {dict}) == _TEST_DATA, \
+ 'A single set should be wrapped into a path'
+ assert traverse_obj(_TEST_DATA, (..., {str.upper})) == ['STR'], \
+ 'Transformation function should not raise'
+ expected = [x for x in map(str_or_none, _TEST_DATA.values()) if x is not None]
+ assert traverse_obj(_TEST_DATA, (..., {str_or_none})) == expected, \
+ 'Function in set should be a transformation'
+ assert traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})) == 'const', \
+ 'Function in set should always be called'
+ # Sets with length < 1 or > 1 not including only types should raise
+ with pytest.raises(Exception):
+ traverse_obj(_TEST_DATA, set())
+ with pytest.raises(Exception):
+ traverse_obj(_TEST_DATA, {str.upper, str})
+
+ def test_traversal_slice(self):
+ _SLICE_DATA = [0, 1, 2, 3, 4]
+
+ assert traverse_obj(_TEST_DATA, ('dict', slice(1))) is None, \
+ 'slice on a dictionary should not throw'
+ assert traverse_obj(_SLICE_DATA, slice(1)) == _SLICE_DATA[:1], \
+ 'slice key should apply slice to sequence'
+ assert traverse_obj(_SLICE_DATA, slice(1, 2)) == _SLICE_DATA[1:2], \
+ 'slice key should apply slice to sequence'
+ assert traverse_obj(_SLICE_DATA, slice(1, 4, 2)) == _SLICE_DATA[1:4:2], \
+ 'slice key should apply slice to sequence'
+
+ def test_traversal_alternatives(self):
+ assert traverse_obj(_TEST_DATA, 'fail', 'str') == 'str', \
+ 'multiple `paths` should be treated as alternative paths'
+ assert traverse_obj(_TEST_DATA, 'str', 100) == 'str', \
+ 'alternatives should exit early'
+ assert traverse_obj(_TEST_DATA, 'fail', 'fail') is None, \
+ 'alternatives should return `default` if exhausted'
+ assert traverse_obj(_TEST_DATA, (..., 'fail'), 100) == 100, \
+ 'alternatives should track their own branching return'
+ assert traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)) == list(_TEST_DATA['data']), \
+ 'alternatives on empty objects should search further'
+
+ def test_traversal_branching_nesting(self):
+ assert traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')) == ['https://www.example.com/0'], \
+ 'tuple as key should be treated as branches'
+ assert traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')) == ['https://www.example.com/0'], \
+ 'list as key should be treated as branches'
+ assert traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))) == ['https://www.example.com/0'], \
+ 'double nesting in path should be treated as paths'
+ assert traverse_obj(['0', [1, 2]], [(0, 1), 0]) == [1], \
+ 'do not fail early on branching'
+ expected = ['https://www.example.com/0', 'https://www.example.com/1']
+ assert traverse_obj(_TEST_DATA, ('urls', ((0, ('fail', 'url')), (1, 'url')))) == expected, \
+ 'tripple nesting in path should be treated as branches'
+ assert traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))) == expected, \
+ 'ellipsis as branch path start gets flattened'
+
+ def test_traversal_dict(self):
+ assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}) == {0: 100, 1: 1.2}, \
+ 'dict key should result in a dict with the same keys'
+ expected = {0: 'https://www.example.com/0'}
+ assert traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}) == expected, \
+ 'dict key should allow paths'
+ expected = {0: ['https://www.example.com/0']}
+ assert traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}) == expected, \
+ 'tuple in dict path should be treated as branches'
+ assert traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}) == expected, \
+ 'double nesting in dict path should be treated as paths'
+ expected = {0: ['https://www.example.com/1', 'https://www.example.com/0']}
+ assert traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}) == expected, \
+ 'tripple nesting in dict path should be treated as branches'
+ assert traverse_obj(_TEST_DATA, {0: 'fail'}) == {}, \
+ 'remove `None` values when top level dict key fails'
+ assert traverse_obj(_TEST_DATA, {0: 'fail'}, default=...) == {0: ...}, \
+ 'use `default` if key fails and `default`'
+ assert traverse_obj(_TEST_DATA, {0: 'dict'}) == {}, \
+ 'remove empty values when dict key'
+ assert traverse_obj(_TEST_DATA, {0: 'dict'}, default=...) == {0: ...}, \
+ 'use `default` when dict key and `default`'
+ assert traverse_obj(_TEST_DATA, {0: {0: 'fail'}}) == {}, \
+ 'remove empty values when nested dict key fails'
+ assert traverse_obj(None, {0: 'fail'}) == {}, \
+ 'default to dict if pruned'
+ assert traverse_obj(None, {0: 'fail'}, default=...) == {0: ...}, \
+ 'default to dict if pruned and default is given'
+ assert traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...) == {0: {0: ...}}, \
+ 'use nested `default` when nested dict key fails and `default`'
+ assert traverse_obj(_TEST_DATA, {0: ('dict', ...)}) == {}, \
+ 'remove key if branch in dict key not successful'
+
+ def test_traversal_default(self):
+ _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
+
+ assert traverse_obj(_DEFAULT_DATA, 'fail') is None, \
+ 'default value should be `None`'
+ assert traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...) == ..., \
+ 'chained fails should result in default'
+ assert traverse_obj(_DEFAULT_DATA, 'None', 'int') == 0, \
+ 'should not short cirquit on `None`'
+ assert traverse_obj(_DEFAULT_DATA, 'fail', default=1) == 1, \
+ 'invalid dict key should result in `default`'
+ assert traverse_obj(_DEFAULT_DATA, 'None', default=1) == 1, \
+ '`None` is a deliberate sentinel and should become `default`'
+ assert traverse_obj(_DEFAULT_DATA, ('list', 10)) is None, \
+ '`IndexError` should result in `default`'
+ assert traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1) == 1, \
+ 'if branched but not successful return `default` if defined, not `[]`'
+ assert traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None) is None, \
+ 'if branched but not successful return `default` even if `default` is `None`'
+ assert traverse_obj(_DEFAULT_DATA, (..., 'fail')) == [], \
+ 'if branched but not successful return `[]`, not `default`'
+ assert traverse_obj(_DEFAULT_DATA, ('list', ...)) == [], \
+ 'if branched but object is empty return `[]`, not `default`'
+ assert traverse_obj(None, ...) == [], \
+ 'if branched but object is `None` return `[]`, not `default`'
+ assert traverse_obj({0: None}, (0, ...)) == [], \
+ 'if branched but state is `None` return `[]`, not `default`'
+
+ @pytest.mark.parametrize('path', [
+ ('fail', ...),
+ (..., 'fail'),
+ 100 * ('fail',) + (...,),
+ (...,) + 100 * ('fail',),
+ ])
+ def test_traversal_branching(self, path):
+ assert traverse_obj({}, path) == [], \
+ 'if branched but state is `None`, return `[]` (not `default`)'
+ assert traverse_obj({}, 'fail', path) == [], \
+ 'if branching in last alternative and previous did not match, return `[]` (not `default`)'
+ assert traverse_obj({0: 'x'}, 0, path) == 'x', \
+ 'if branching in last alternative and previous did match, return single value'
+ assert traverse_obj({0: 'x'}, path, 0) == 'x', \
+ 'if branching in first alternative and non-branching path does match, return single value'
+ assert traverse_obj({}, path, 'fail') is None, \
+ 'if branching in first alternative and non-branching path does not match, return `default`'
+
+ def test_traversal_expected_type(self):
+ _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
+
+ assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str) == 'str', \
+ 'accept matching `expected_type` type'
+ assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int) is None, \
+ 'reject non matching `expected_type` type'
+ assert traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)) == '0', \
+ 'transform type using type function'
+ assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0) is None, \
+ 'wrap expected_type fuction in try_call'
+ assert traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str) == ['str'], \
+ 'eliminate items that expected_type fails on'
+ assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int) == {0: 100}, \
+ 'type as expected_type should filter dict values'
+ assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none) == {0: '100', 1: '1.2'}, \
+ 'function as expected_type should transform dict values'
+ assert traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int) == 1, \
+ 'expected_type should not filter non final dict values'
+ assert traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int) == {0: {0: 100}}, \
+ 'expected_type should transform deep dict values'
+ assert traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)) == [{0: ...}, {0: ...}], \
+ 'expected_type should transform branched dict values'
+ assert traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int) == [4], \
+ 'expected_type regression for type matching in tuple branching'
+ assert traverse_obj(_TEST_DATA, ['data', ...], expected_type=int) == [], \
+ 'expected_type regression for type matching in dict result'
+
+ def test_traversal_get_all(self):
+ _GET_ALL_DATA = {'key': [0, 1, 2]}
+
+ assert traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False) == 0, \
+ 'if not `get_all`, return only first matching value'
+ assert traverse_obj(_GET_ALL_DATA, ..., get_all=False) == [0, 1, 2], \
+ 'do not overflatten if not `get_all`'
+
+ def test_traversal_casesense(self):
+ _CASESENSE_DATA = {
+ 'KeY': 'value0',
+ 0: {
+ 'KeY': 'value1',
+ 0: {'KeY': 'value2'},
+ },
+ }
+
+ assert traverse_obj(_CASESENSE_DATA, 'key') is None, \
+ 'dict keys should be case sensitive unless `casesense`'
+ assert traverse_obj(_CASESENSE_DATA, 'keY', casesense=False) == 'value0', \
+ 'allow non matching key case if `casesense`'
+ assert traverse_obj(_CASESENSE_DATA, [0, ('keY',)], casesense=False) == ['value1'], \
+ 'allow non matching key case in branch if `casesense`'
+ assert traverse_obj(_CASESENSE_DATA, [0, ([0, 'keY'],)], casesense=False) == ['value2'], \
+ 'allow non matching key case in branch path if `casesense`'
+
+ def test_traversal_traverse_string(self):
+ _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
+
+ assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)) is None, \
+ 'do not traverse into string if not `traverse_string`'
+ assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0), traverse_string=True) == 's', \
+ 'traverse into string if `traverse_string`'
+ assert traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1), traverse_string=True) == '.', \
+ 'traverse into converted data if `traverse_string`'
+ assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...), traverse_string=True) == 'str', \
+ '`...` should result in string (same value) if `traverse_string`'
+ assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), traverse_string=True) == 'sr', \
+ '`slice` should result in string if `traverse_string`'
+ assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"), traverse_string=True) == 'str', \
+ 'function should result in string if `traverse_string`'
+ assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), traverse_string=True) == ['s', 'r'], \
+ 'branching should result in list if `traverse_string`'
+ assert traverse_obj({}, (0, ...), traverse_string=True) == [], \
+ 'branching should result in list if `traverse_string`'
+ assert traverse_obj({}, (0, lambda x, y: True), traverse_string=True) == [], \
+ 'branching should result in list if `traverse_string`'
+ assert traverse_obj({}, (0, slice(1)), traverse_string=True) == [], \
+ 'branching should result in list if `traverse_string`'
+
+ def test_traversal_re(self):
+ mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
+ assert traverse_obj(mobj, ...) == [x for x in mobj.groups() if x is not None], \
+ '`...` on a `re.Match` should give its `groups()`'
+ assert traverse_obj(mobj, lambda k, _: k in (0, 2)) == ['0123', '3'], \
+ 'function on a `re.Match` should give groupno, value starting at 0'
+ assert traverse_obj(mobj, 'group') == '3', \
+ 'str key on a `re.Match` should give group with that name'
+ assert traverse_obj(mobj, 2) == '3', \
+ 'int key on a `re.Match` should give group with that name'
+ assert traverse_obj(mobj, 'gRoUp', casesense=False) == '3', \
+ 'str key on a `re.Match` should respect casesense'
+ assert traverse_obj(mobj, 'fail') is None, \
+ 'failing str key on a `re.Match` should return `default`'
+ assert traverse_obj(mobj, 'gRoUpS', casesense=False) is None, \
+ 'failing str key on a `re.Match` should return `default`'
+ assert traverse_obj(mobj, 8) is None, \
+ 'failing int key on a `re.Match` should return `default`'
+ assert traverse_obj(mobj, lambda k, _: k in (0, 'group')) == ['0123', '3'], \
+ 'function on a `re.Match` should give group name as well'
+
+ def test_traversal_xml_etree(self):
+ etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
+ <data>
+ <country name="Liechtenstein">
+ <rank>1</rank>
+ <year>2008</year>
+ <gdppc>141100</gdppc>
+ <neighbor name="Austria" direction="E"/>
+ <neighbor name="Switzerland" direction="W"/>
+ </country>
+ <country name="Singapore">
+ <rank>4</rank>
+ <year>2011</year>
+ <gdppc>59900</gdppc>
+ <neighbor name="Malaysia" direction="N"/>
+ </country>
+ <country name="Panama">
+ <rank>68</rank>
+ <year>2011</year>
+ <gdppc>13600</gdppc>
+ <neighbor name="Costa Rica" direction="W"/>
+ <neighbor name="Colombia" direction="E"/>
+ </country>
+ </data>''')
+ assert traverse_obj(etree, '') == etree, \
+ 'empty str key should return the element itself'
+ assert traverse_obj(etree, 'country') == list(etree), \
+ 'str key should lead all children with that tag name'
+ assert traverse_obj(etree, ...) == list(etree), \
+ '`...` as key should return all children'
+ assert traverse_obj(etree, lambda _, x: x[0].text == '4') == [etree[1]], \
+ 'function as key should get element as value'
+ assert traverse_obj(etree, lambda i, _: i == 1) == [etree[1]], \
+ 'function as key should get index as key'
+ assert traverse_obj(etree, 0) == etree[0], \
+ 'int key should return the nth child'
+ expected = ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia']
+ assert traverse_obj(etree, './/neighbor/@name') == expected, \
+ '`@<attribute>` at end of path should give that attribute'
+ assert traverse_obj(etree, '//neighbor/@fail') == [None, None, None, None, None], \
+ '`@<nonexistant>` at end of path should give `None`'
+ assert traverse_obj(etree, ('//neighbor/@', 2)) == {'name': 'Malaysia', 'direction': 'N'}, \
+ '`@` should give the full attribute dict'
+ assert traverse_obj(etree, '//year/text()') == ['2008', '2011', '2011'], \
+ '`text()` at end of path should give the inner text'
+ assert traverse_obj(etree, '//*[@direction]/@direction') == ['E', 'W', 'N', 'W', 'E'], \
+ 'full Python xpath features should be supported'
+ assert traverse_obj(etree, (0, '@name')) == 'Liechtenstein', \
+ 'special transformations should act on current element'
+ assert traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})) == [1, 2008, 141100], \
+ 'special transformations should act on current element'
+
+ def test_traversal_unbranching(self):
+ assert traverse_obj(_TEST_DATA, [(100, 1.2), all]) == [100, 1.2], \
+ '`all` should give all results as list'
+ assert traverse_obj(_TEST_DATA, [(100, 1.2), any]) == 100, \
+ '`any` should give the first result'
+ assert traverse_obj(_TEST_DATA, [100, all]) == [100], \
+ '`all` should give list if non branching'
+ assert traverse_obj(_TEST_DATA, [100, any]) == 100, \
+ '`any` should give single item if non branching'
+ assert traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]) == [100], \
+ '`all` should filter `None` and empty dict'
+ assert traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]) == 100, \
+ '`any` should filter `None` and empty dict'
+ assert traverse_obj(_TEST_DATA, [{
+ 'all': [('dict', 'None', 100, 1.2), all],
+ 'any': [('dict', 'None', 100, 1.2), any],
+ }]) == {'all': [100, 1.2], 'any': 100}, \
+ '`all`/`any` should apply to each dict path separately'
+ assert traverse_obj(_TEST_DATA, [{
+ 'all': [('dict', 'None', 100, 1.2), all],
+ 'any': [('dict', 'None', 100, 1.2), any],
+ }], get_all=False) == {'all': [100, 1.2], 'any': 100}, \
+ '`all`/`any` should apply to dict regardless of `get_all`'
+ assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, {float}]) is None, \
+ '`all` should reset branching status'
+ assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, {float}]) is None, \
+ '`any` should reset branching status'
+ assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, ..., {float}]) == [1.2], \
+ '`all` should allow further branching'
+ assert traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, ..., 'index']) == [0, 1], \
+ '`any` should allow further branching'
+
+ def test_traversal_morsel(self):
+ values = {
+ 'expires': 'a',
+ 'path': 'b',
+ 'comment': 'c',
+ 'domain': 'd',
+ 'max-age': 'e',
+ 'secure': 'f',
+ 'httponly': 'g',
+ 'version': 'h',
+ 'samesite': 'i',
+ }
+ morsel = http.cookies.Morsel()
+ morsel.set('item_key', 'item_value', 'coded_value')
+ morsel.update(values)
+ values['key'] = 'item_key'
+ values['value'] = 'item_value'
+
+ for key, value in values.items():
+ assert traverse_obj(morsel, key) == value, \
+ 'Morsel should provide access to all values'
+ assert traverse_obj(morsel, ...) == list(values.values()), \
+ '`...` should yield all values'
+ assert traverse_obj(morsel, lambda k, v: True) == list(values.values()), \
+ 'function key should yield all values'
+ assert traverse_obj(morsel, [(None,), any]) == morsel, \
+ 'Morsel should not be implicitly changed to dict on usage'
+
+
+class TestDictGet:
+ def test_dict_get(self):
+ FALSE_VALUES = {
+ 'none': None,
+ 'false': False,
+ 'zero': 0,
+ 'empty_string': '',
+ 'empty_list': [],
+ }
+ d = {**FALSE_VALUES, 'a': 42}
+ assert dict_get(d, 'a') == 42
+ assert dict_get(d, 'b') is None
+ assert dict_get(d, 'b', 42) == 42
+ assert dict_get(d, ('a',)) == 42
+ assert dict_get(d, ('b', 'a')) == 42
+ assert dict_get(d, ('b', 'c', 'a', 'd')) == 42
+ assert dict_get(d, ('b', 'c')) is None
+ assert dict_get(d, ('b', 'c'), 42) == 42
+ for key, false_value in FALSE_VALUES.items():
+ assert dict_get(d, ('b', 'c', key)) is None
+ assert dict_get(d, ('b', 'c', key), skip_false_values=False) == false_value
diff --git a/test/test_utils.py b/test/test_utils.py
index a3073f0..77fadbb 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -2,10 +2,10 @@
# Allow direct execution
import os
-import re
import sys
import unittest
import warnings
+import datetime as dt
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -28,6 +28,7 @@ from yt_dlp.utils import (
ExtractorError,
InAdvancePagedList,
LazyList,
+ NO_DEFAULT,
OnDemandPagedList,
Popen,
age_restricted,
@@ -45,7 +46,6 @@ from yt_dlp.utils import (
determine_ext,
determine_file_encoding,
dfxp2srt,
- dict_get,
encode_base_n,
encode_compat_str,
encodeFilename,
@@ -106,13 +106,11 @@ from yt_dlp.utils import (
sanitize_url,
shell_quote,
smuggle_url,
- str_or_none,
str_to_int,
strip_jsonp,
strip_or_none,
subtitles_filename,
timeconvert,
- traverse_obj,
try_call,
unescapeHTML,
unified_strdate,
@@ -755,28 +753,6 @@ class TestUtil(unittest.TestCase):
self.assertRaises(
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
- def test_dict_get(self):
- FALSE_VALUES = {
- 'none': None,
- 'false': False,
- 'zero': 0,
- 'empty_string': '',
- 'empty_list': [],
- }
- d = FALSE_VALUES.copy()
- d['a'] = 42
- self.assertEqual(dict_get(d, 'a'), 42)
- self.assertEqual(dict_get(d, 'b'), None)
- self.assertEqual(dict_get(d, 'b', 42), 42)
- self.assertEqual(dict_get(d, ('a', )), 42)
- self.assertEqual(dict_get(d, ('b', 'a', )), 42)
- self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
- self.assertEqual(dict_get(d, ('b', 'c', )), None)
- self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
- for key, false_value in FALSE_VALUES.items():
- self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
- self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
-
def test_merge_dicts(self):
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
@@ -794,6 +770,11 @@ class TestUtil(unittest.TestCase):
def test_parse_iso8601(self):
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066)
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066)
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=NO_DEFAULT), None)
+ # default does not override timezone in date_str
+ self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00', timezone=dt.timedelta(hours=-10)), 1395641066)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
@@ -2039,359 +2020,6 @@ Line 1
warnings.simplefilter('ignore')
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
- def test_traverse_obj(self):
- _TEST_DATA = {
- 100: 100,
- 1.2: 1.2,
- 'str': 'str',
- 'None': None,
- '...': ...,
- 'urls': [
- {'index': 0, 'url': 'https://www.example.com/0'},
- {'index': 1, 'url': 'https://www.example.com/1'},
- ],
- 'data': (
- {'index': 2},
- {'index': 3},
- ),
- 'dict': {},
- }
-
- # Test base functionality
- self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
- msg='allow tuple path')
- self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
- msg='allow list path')
- self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
- msg='allow iterable path')
- self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
- msg='single items should be treated as a path')
- self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
- self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
- self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
-
- # Test Ellipsis behavior
- self.assertCountEqual(traverse_obj(_TEST_DATA, ...),
- (item for item in _TEST_DATA.values() if item not in (None, {})),
- msg='`...` should give all non discarded values')
- self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, ...)), _TEST_DATA['urls'][0].values(),
- msg='`...` selection for dicts should select all values')
- self.assertEqual(traverse_obj(_TEST_DATA, (..., ..., 'url')),
- ['https://www.example.com/0', 'https://www.example.com/1'],
- msg='nested `...` queries should work')
- self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
- msg='`...` query result should be flattened')
- self.assertEqual(traverse_obj(iter(range(4)), ...), list(range(4)),
- msg='`...` should accept iterables')
-
- # Test function as key
- self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
- [_TEST_DATA['urls']],
- msg='function as query key should perform a filter based on (key, value)')
- self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
- msg='exceptions in the query function should be catched')
- self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
- msg='function key should accept iterables')
- if __debug__:
- with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
- traverse_obj(_TEST_DATA, lambda a: ...)
- with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
- traverse_obj(_TEST_DATA, lambda a, b, c: ...)
-
- # Test set as key (transformation/type, like `expected_type`)
- self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper}, )), ['STR'],
- msg='Function in set should be a transformation')
- self.assertEqual(traverse_obj(_TEST_DATA, (..., {str})), ['str'],
- msg='Type in set should be a type filter')
- self.assertEqual(traverse_obj(_TEST_DATA, {dict}), _TEST_DATA,
- msg='A single set should be wrapped into a path')
- self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper})), ['STR'],
- msg='Transformation function should not raise')
- self.assertEqual(traverse_obj(_TEST_DATA, (..., {str_or_none})),
- [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
- msg='Function in set should be a transformation')
- self.assertEqual(traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})), 'const',
- msg='Function in set should always be called')
- if __debug__:
- with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
- traverse_obj(_TEST_DATA, set())
- with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
- traverse_obj(_TEST_DATA, {str.upper, str})
-
- # Test `slice` as a key
- _SLICE_DATA = [0, 1, 2, 3, 4]
- self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
- msg='slice on a dictionary should not throw')
- self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
- msg='slice key should apply slice to sequence')
- self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
- msg='slice key should apply slice to sequence')
- self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
- msg='slice key should apply slice to sequence')
-
- # Test alternative paths
- self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
- msg='multiple `paths` should be treated as alternative paths')
- self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
- msg='alternatives should exit early')
- self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
- msg='alternatives should return `default` if exhausted')
- self.assertEqual(traverse_obj(_TEST_DATA, (..., 'fail'), 100), 100,
- msg='alternatives should track their own branching return')
- self.assertEqual(traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)), list(_TEST_DATA['data']),
- msg='alternatives on empty objects should search further')
-
- # Test branch and path nesting
- self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
- msg='tuple as key should be treated as branches')
- self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
- msg='list as key should be treated as branches')
- self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
- msg='double nesting in path should be treated as paths')
- self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
- msg='do not fail early on branching')
- self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
- ['https://www.example.com/0', 'https://www.example.com/1'],
- msg='tripple nesting in path should be treated as branches')
- self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))),
- ['https://www.example.com/0', 'https://www.example.com/1'],
- msg='ellipsis as branch path start gets flattened')
-
- # Test dictionary as key
- self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
- msg='dict key should result in a dict with the same keys')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
- {0: 'https://www.example.com/0'},
- msg='dict key should allow paths')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
- {0: ['https://www.example.com/0']},
- msg='tuple in dict path should be treated as branches')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
- {0: ['https://www.example.com/0']},
- msg='double nesting in dict path should be treated as paths')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
- {0: ['https://www.example.com/1', 'https://www.example.com/0']},
- msg='tripple nesting in dict path should be treated as branches')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
- msg='remove `None` values when top level dict key fails')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=...), {0: ...},
- msg='use `default` if key fails and `default`')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
- msg='remove empty values when dict key')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=...), {0: ...},
- msg='use `default` when dict key and `default`')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
- msg='remove empty values when nested dict key fails')
- self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
- msg='default to dict if pruned')
- self.assertEqual(traverse_obj(None, {0: 'fail'}, default=...), {0: ...},
- msg='default to dict if pruned and default is given')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...), {0: {0: ...}},
- msg='use nested `default` when nested dict key fails and `default`')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', ...)}), {},
- msg='remove key if branch in dict key not successful')
-
- # Testing default parameter behavior
- _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
- self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
- msg='default value should be `None`')
- self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...), ...,
- msg='chained fails should result in default')
- self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
- msg='should not short cirquit on `None`')
- self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
- msg='invalid dict key should result in `default`')
- self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
- msg='`None` is a deliberate sentinel and should become `default`')
- self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
- msg='`IndexError` should result in `default`')
- self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1), 1,
- msg='if branched but not successful return `default` if defined, not `[]`')
- self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None), None,
- msg='if branched but not successful return `default` even if `default` is `None`')
- self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail')), [],
- msg='if branched but not successful return `[]`, not `default`')
- self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', ...)), [],
- msg='if branched but object is empty return `[]`, not `default`')
- self.assertEqual(traverse_obj(None, ...), [],
- msg='if branched but object is `None` return `[]`, not `default`')
- self.assertEqual(traverse_obj({0: None}, (0, ...)), [],
- msg='if branched but state is `None` return `[]`, not `default`')
-
- branching_paths = [
- ('fail', ...),
- (..., 'fail'),
- 100 * ('fail',) + (...,),
- (...,) + 100 * ('fail',),
- ]
- for branching_path in branching_paths:
- self.assertEqual(traverse_obj({}, branching_path), [],
- msg='if branched but state is `None`, return `[]` (not `default`)')
- self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
- msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
- self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
- msg='if branching in last alternative and previous did match, return single value')
- self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
- msg='if branching in first alternative and non-branching path does match, return single value')
- self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
- msg='if branching in first alternative and non-branching path does not match, return `default`')
-
- # Testing expected_type behavior
- _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
- self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
- 'str', msg='accept matching `expected_type` type')
- self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
- None, msg='reject non matching `expected_type` type')
- self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
- '0', msg='transform type using type function')
- self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
- None, msg='wrap expected_type fuction in try_call')
- self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str),
- ['str'], msg='eliminate items that expected_type fails on')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
- {0: 100}, msg='type as expected_type should filter dict values')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
- {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
- self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int),
- 1, msg='expected_type should not filter non final dict values')
- self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
- {0: {0: 100}}, msg='expected_type should transform deep dict values')
- self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)),
- [{0: ...}, {0: ...}], msg='expected_type should transform branched dict values')
- self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
- [4], msg='expected_type regression for type matching in tuple branching')
- self.assertEqual(traverse_obj(_TEST_DATA, ['data', ...], expected_type=int),
- [], msg='expected_type regression for type matching in dict result')
-
- # Test get_all behavior
- _GET_ALL_DATA = {'key': [0, 1, 2]}
- self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False), 0,
- msg='if not `get_all`, return only first matching value')
- self.assertEqual(traverse_obj(_GET_ALL_DATA, ..., get_all=False), [0, 1, 2],
- msg='do not overflatten if not `get_all`')
-
- # Test casesense behavior
- _CASESENSE_DATA = {
- 'KeY': 'value0',
- 0: {
- 'KeY': 'value1',
- 0: {'KeY': 'value2'},
- },
- }
- self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
- msg='dict keys should be case sensitive unless `casesense`')
- self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
- casesense=False), 'value0',
- msg='allow non matching key case if `casesense`')
- self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
- casesense=False), ['value1'],
- msg='allow non matching key case in branch if `casesense`')
- self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
- casesense=False), ['value2'],
- msg='allow non matching key case in branch path if `casesense`')
-
- # Test traverse_string behavior
- _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
- self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
- msg='do not traverse into string if not `traverse_string`')
- self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
- traverse_string=True), 's',
- msg='traverse into string if `traverse_string`')
- self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
- traverse_string=True), '.',
- msg='traverse into converted data if `traverse_string`')
- self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...),
- traverse_string=True), 'str',
- msg='`...` should result in string (same value) if `traverse_string`')
- self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
- traverse_string=True), 'sr',
- msg='`slice` should result in string if `traverse_string`')
- self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"),
- traverse_string=True), 'str',
- msg='function should result in string if `traverse_string`')
- self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
- traverse_string=True), ['s', 'r'],
- msg='branching should result in list if `traverse_string`')
- self.assertEqual(traverse_obj({}, (0, ...), traverse_string=True), [],
- msg='branching should result in list if `traverse_string`')
- self.assertEqual(traverse_obj({}, (0, lambda x, y: True), traverse_string=True), [],
- msg='branching should result in list if `traverse_string`')
- self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
- msg='branching should result in list if `traverse_string`')
-
- # Test re.Match as input obj
- mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
- self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
- msg='`...` on a `re.Match` should give its `groups()`')
- self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
- msg='function on a `re.Match` should give groupno, value starting at 0')
- self.assertEqual(traverse_obj(mobj, 'group'), '3',
- msg='str key on a `re.Match` should give group with that name')
- self.assertEqual(traverse_obj(mobj, 2), '3',
- msg='int key on a `re.Match` should give group with that name')
- self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
- msg='str key on a `re.Match` should respect casesense')
- self.assertEqual(traverse_obj(mobj, 'fail'), None,
- msg='failing str key on a `re.Match` should return `default`')
- self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
- msg='failing str key on a `re.Match` should return `default`')
- self.assertEqual(traverse_obj(mobj, 8), None,
- msg='failing int key on a `re.Match` should return `default`')
- self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
- msg='function on a `re.Match` should give group name as well')
-
- # Test xml.etree.ElementTree.Element as input obj
- etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
- <data>
- <country name="Liechtenstein">
- <rank>1</rank>
- <year>2008</year>
- <gdppc>141100</gdppc>
- <neighbor name="Austria" direction="E"/>
- <neighbor name="Switzerland" direction="W"/>
- </country>
- <country name="Singapore">
- <rank>4</rank>
- <year>2011</year>
- <gdppc>59900</gdppc>
- <neighbor name="Malaysia" direction="N"/>
- </country>
- <country name="Panama">
- <rank>68</rank>
- <year>2011</year>
- <gdppc>13600</gdppc>
- <neighbor name="Costa Rica" direction="W"/>
- <neighbor name="Colombia" direction="E"/>
- </country>
- </data>''')
- self.assertEqual(traverse_obj(etree, ''), etree,
- msg='empty str key should return the element itself')
- self.assertEqual(traverse_obj(etree, 'country'), list(etree),
- msg='str key should lead all children with that tag name')
- self.assertEqual(traverse_obj(etree, ...), list(etree),
- msg='`...` as key should return all children')
- self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
- msg='function as key should get element as value')
- self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
- msg='function as key should get index as key')
- self.assertEqual(traverse_obj(etree, 0), etree[0],
- msg='int key should return the nth child')
- self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
- ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
- msg='`@<attribute>` at end of path should give that attribute')
- self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
- msg='`@<nonexistant>` at end of path should give `None`')
- self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
- msg='`@` should give the full attribute dict')
- self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
- msg='`text()` at end of path should give the inner text')
- self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
- msg='full Python xpath features should be supported')
- self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
- msg='special transformations should act on current element')
- self.assertEqual(traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})), [1, 2008, 141100],
- msg='special transformations should act on current element')
-
def test_http_header_dict(self):
headers = HTTPHeaderDict()
headers['ytdl-test'] = b'0'
@@ -2438,7 +2066,22 @@ Line 1
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
- def test_Popen_windows_escaping(self):
+ def test_windows_escaping(self):
+ tests = [
+ 'test"&',
+ '%CMDCMDLINE:~-1%&',
+ 'a\nb',
+ '"',
+ '\\',
+ '!',
+ '^!',
+ 'a \\ b',
+ 'a \\" b',
+ 'a \\ b\\',
+ # We replace \r with \n
+ ('a\r\ra', 'a\n\na'),
+ ]
+
def run_shell(args):
stdout, stderr, error = Popen.run(
args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -2446,11 +2089,15 @@ Line 1
assert not error
return stdout
- # Test escaping
- assert run_shell(['echo', 'test"&']) == '"test""&"\n'
- # Test if delayed expansion is disabled
- assert run_shell(['echo', '^!']) == '"^!"\n'
- assert run_shell('echo "^!"') == '"^!"\n'
+ for argument in tests:
+ if isinstance(argument, str):
+ expected = argument
+ else:
+ argument, expected = argument
+
+ args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
+ assert run_shell(args) == expected
+ assert run_shell(shell_quote(args, shell=True)) == expected
if __name__ == '__main__':
diff --git a/test/test_websockets.py b/test/test_websockets.py
index 13b3a1e..aa0dfa2 100644
--- a/test/test_websockets.py
+++ b/test/test_websockets.py
@@ -3,10 +3,12 @@
# Allow direct execution
import os
import sys
+import time
import pytest
from test.helper import verify_address_availability
+from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -18,7 +20,7 @@ import random
import ssl
import threading
-from yt_dlp import socks
+from yt_dlp import socks, traverse_obj
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import websockets
from yt_dlp.networking import Request
@@ -32,8 +34,6 @@ from yt_dlp.networking.exceptions import (
)
from yt_dlp.utils.networking import HTTPHeaderDict
-from test.conftest import validate_and_send
-
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -66,7 +66,9 @@ def process_request(self, request):
def create_websocket_server(**ws_kwargs):
import websockets.sync.server
- wsd = websockets.sync.server.serve(websocket_handler, '127.0.0.1', 0, process_request=process_request, **ws_kwargs)
+ wsd = websockets.sync.server.serve(
+ websocket_handler, '127.0.0.1', 0,
+ process_request=process_request, open_timeout=2, **ws_kwargs)
ws_port = wsd.socket.getsockname()[1]
ws_server_thread = threading.Thread(target=wsd.serve_forever)
ws_server_thread.daemon = True
@@ -100,7 +102,21 @@ def create_mtls_wss_websocket_server():
return create_websocket_server(ssl_context=sslctx)
+def ws_validate_and_send(rh, req):
+ rh.validate(req)
+ max_tries = 3
+ for i in range(max_tries):
+ try:
+ return rh.send(req)
+ except TransportError as e:
+ if i < (max_tries - 1) and 'connection closed during handshake' in str(e):
+ # websockets server sometimes hangs on new connections
+ continue
+ raise
+
+
@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers')
+@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
class TestWebsSocketRequestHandlerConformance:
@classmethod
def setup_class(cls):
@@ -116,10 +132,9 @@ class TestWebsSocketRequestHandlerConformance:
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_basic_websockets(self, handler):
with handler() as rh:
- ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws = ws_validate_and_send(rh, Request(self.ws_base_url))
assert 'upgrade' in ws.headers
assert ws.status == 101
ws.send('foo')
@@ -128,33 +143,29 @@ class TestWebsSocketRequestHandlerConformance:
# https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6
@pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)])
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_send_types(self, handler, msg, opcode):
with handler() as rh:
- ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws = ws_validate_and_send(rh, Request(self.ws_base_url))
ws.send(msg)
assert int(ws.recv()) == opcode
ws.close()
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_verify_cert(self, handler):
with handler() as rh:
with pytest.raises(CertificateVerifyError):
- validate_and_send(rh, Request(self.wss_base_url))
+ ws_validate_and_send(rh, Request(self.wss_base_url))
with handler(verify=False) as rh:
- ws = validate_and_send(rh, Request(self.wss_base_url))
+ ws = ws_validate_and_send(rh, Request(self.wss_base_url))
assert ws.status == 101
ws.close()
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_ssl_error(self, handler):
with handler(verify=False) as rh:
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
- validate_and_send(rh, Request(self.bad_wss_host))
+ ws_validate_and_send(rh, Request(self.bad_wss_host))
assert not issubclass(exc_info.type, CertificateVerifyError)
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
@pytest.mark.parametrize('path,expected', [
# Unicode characters should be encoded with uppercase percent-encoding
('/中文', '/%E4%B8%AD%E6%96%87'),
@@ -163,18 +174,17 @@ class TestWebsSocketRequestHandlerConformance:
])
def test_percent_encode(self, handler, path, expected):
with handler() as rh:
- ws = validate_and_send(rh, Request(f'{self.ws_base_url}{path}'))
+ ws = ws_validate_and_send(rh, Request(f'{self.ws_base_url}{path}'))
ws.send('path')
assert ws.recv() == expected
assert ws.status == 101
ws.close()
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_remove_dot_segments(self, handler):
with handler() as rh:
# This isn't a comprehensive test,
# but it should be enough to check whether the handler is removing dot segments
- ws = validate_and_send(rh, Request(f'{self.ws_base_url}/a/b/./../../test'))
+ ws = ws_validate_and_send(rh, Request(f'{self.ws_base_url}/a/b/./../../test'))
assert ws.status == 101
ws.send('path')
assert ws.recv() == '/test'
@@ -182,25 +192,37 @@ class TestWebsSocketRequestHandlerConformance:
# We are restricted to known HTTP status codes in http.HTTPStatus
# Redirects are not supported for websockets
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
@pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511))
def test_raise_http_error(self, handler, status):
with handler() as rh:
with pytest.raises(HTTPError) as exc_info:
- validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
+ ws_validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
assert exc_info.value.status == status
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
@pytest.mark.parametrize('params,extensions', [
({'timeout': sys.float_info.min}, {}),
({}, {'timeout': sys.float_info.min}),
])
- def test_timeout(self, handler, params, extensions):
+ def test_read_timeout(self, handler, params, extensions):
with handler(**params) as rh:
with pytest.raises(TransportError):
- validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))
+ ws_validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))
+
+ def test_connect_timeout(self, handler):
+ # nothing should be listening on this port
+ connect_timeout_url = 'ws://10.255.255.255'
+ with handler(timeout=0.01) as rh, pytest.raises(TransportError):
+ now = time.time()
+ ws_validate_and_send(rh, Request(connect_timeout_url))
+ assert time.time() - now < DEFAULT_TIMEOUT
+
+ # Per request timeout, should override handler timeout
+ request = Request(connect_timeout_url, extensions={'timeout': 0.01})
+ with handler() as rh, pytest.raises(TransportError):
+ now = time.time()
+ ws_validate_and_send(rh, request)
+ assert time.time() - now < DEFAULT_TIMEOUT
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_cookies(self, handler):
cookiejar = YoutubeDLCookieJar()
cookiejar.set_cookie(http.cookiejar.Cookie(
@@ -210,52 +232,49 @@ class TestWebsSocketRequestHandlerConformance:
comment_url=None, rest={}))
with handler(cookiejar=cookiejar) as rh:
- ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws = ws_validate_and_send(rh, Request(self.ws_base_url))
ws.send('headers')
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
ws.close()
with handler() as rh:
- ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws = ws_validate_and_send(rh, Request(self.ws_base_url))
ws.send('headers')
assert 'cookie' not in json.loads(ws.recv())
ws.close()
- ws = validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': cookiejar}))
+ ws = ws_validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': cookiejar}))
ws.send('headers')
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
ws.close()
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_source_address(self, handler):
source_address = f'127.0.0.{random.randint(5, 255)}'
verify_address_availability(source_address)
with handler(source_address=source_address) as rh:
- ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws = ws_validate_and_send(rh, Request(self.ws_base_url))
ws.send('source_address')
assert source_address == ws.recv()
ws.close()
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_response_url(self, handler):
with handler() as rh:
url = f'{self.ws_base_url}/something'
- ws = validate_and_send(rh, Request(url))
+ ws = ws_validate_and_send(rh, Request(url))
assert ws.url == url
ws.close()
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_request_headers(self, handler):
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
# Global Headers
- ws = validate_and_send(rh, Request(self.ws_base_url))
+ ws = ws_validate_and_send(rh, Request(self.ws_base_url))
ws.send('headers')
headers = HTTPHeaderDict(json.loads(ws.recv()))
assert headers['test1'] == 'test'
ws.close()
# Per request headers, merged with global
- ws = validate_and_send(rh, Request(
+ ws = ws_validate_and_send(rh, Request(
self.ws_base_url, headers={'test2': 'changed', 'test3': 'test3'}))
ws.send('headers')
headers = HTTPHeaderDict(json.loads(ws.recv()))
@@ -280,7 +299,6 @@ class TestWebsSocketRequestHandlerConformance:
'client_certificate_password': 'foobar',
}
))
- @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_mtls(self, handler, client_cert):
with handler(
# Disable client-side validation of unacceptable self-signed testcert.pem
@@ -288,7 +306,45 @@ class TestWebsSocketRequestHandlerConformance:
verify=False,
client_cert=client_cert
) as rh:
- validate_and_send(rh, Request(self.mtls_wss_base_url)).close()
+ ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close()
+
+ def test_request_disable_proxy(self, handler):
+ for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
+ # Given handler is configured with a proxy
+ with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
+ # When a proxy is explicitly set to None for the request
+ ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'http': None}))
+ # Then no proxy should be used
+ assert ws.status == 101
+ ws.close()
+
+ @pytest.mark.skip_handlers_if(
+ lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
+ def test_noproxy(self, handler):
+ for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
+ # Given the handler is configured with a proxy
+ with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
+ for no_proxy in (f'127.0.0.1:{self.ws_port}', '127.0.0.1', 'localhost'):
+ # When request no proxy includes the request url host
+ ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'no': no_proxy}))
+ # Then the proxy should not be used
+ assert ws.status == 101
+ ws.close()
+
+ @pytest.mark.skip_handlers_if(
+ lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
+ def test_allproxy(self, handler):
+ supported_proto = traverse_obj(handler._SUPPORTED_PROXY_SCHEMES, 0, default='ws')
+ # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
+ # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
+ with handler(proxies={'all': f'{supported_proto}://10.255.255.255'}, timeout=0.1) as rh:
+ with pytest.raises(TransportError):
+ ws_validate_and_send(rh, Request(self.ws_base_url)).close()
+
+ with handler(timeout=0.1) as rh:
+ with pytest.raises(TransportError):
+ ws_validate_and_send(
+ rh, Request(self.ws_base_url, proxies={'all': f'{supported_proto}://10.255.255.255'})).close()
def create_fake_ws_connection(raised):
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index c34d97b..2c6f695 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1,7 +1,7 @@
import collections
import contextlib
import copy
-import datetime
+import datetime as dt
import errno
import fileinput
import http.cookiejar
@@ -25,7 +25,7 @@ import unicodedata
from .cache import Cache
from .compat import functools, urllib # isort: split
-from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
+from .compat import compat_os_name, urllib_req_to_req
from .cookies import LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
@@ -42,6 +42,7 @@ from .networking.exceptions import (
SSLError,
network_exceptions,
)
+from .networking.impersonate import ImpersonateRequestHandler
from .plugins import directories as plugin_directories
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
from .postprocessor import (
@@ -99,8 +100,8 @@ from .utils import (
SameFileError,
UnavailableVideoError,
UserNotLive,
+ YoutubeDLError,
age_restricted,
- args_to_str,
bug_reports_message,
date_from_str,
deprecation_warning,
@@ -139,11 +140,13 @@ from .utils import (
sanitize_filename,
sanitize_path,
sanitize_url,
+ shell_quote,
str_or_none,
strftime_or_none,
subtitles_filename,
supports_terminal_sequences,
system_identifier,
+ filesize_from_tbr,
timetuple_from_msec,
to_high_limit_path,
traverse_obj,
@@ -402,6 +405,8 @@ class YoutubeDL:
- "detect_or_warn": check whether we can do anything
about it, warn otherwise (default)
source_address: Client-side IP address to bind to.
+ impersonate: Client to impersonate for requests.
+ An ImpersonateTarget (from yt_dlp.networking.impersonate)
sleep_interval_requests: Number of seconds to sleep between requests
during extraction
sleep_interval: Number of seconds to sleep before each download when
@@ -476,7 +481,7 @@ class YoutubeDL:
nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
- external_downloader_args, concurrent_fragment_downloads.
+ external_downloader_args, concurrent_fragment_downloads, progress_delta.
The following options are used by the post processors:
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
@@ -713,6 +718,13 @@ class YoutubeDL:
for msg in self.params.get('_deprecation_warnings', []):
self.deprecated_feature(msg)
+ if impersonate_target := self.params.get('impersonate'):
+ if not self._impersonate_target_available(impersonate_target):
+ raise YoutubeDLError(
+ f'Impersonate target "{impersonate_target}" is not available. '
+ f'Use --list-impersonate-targets to see available targets. '
+ f'You may be missing dependencies required to support this target.')
+
if 'list-formats' in self.params['compat_opts']:
self.params['listformats_table'] = False
@@ -811,7 +823,7 @@ class YoutubeDL:
self.report_warning(
'Long argument string detected. '
'Use -- to separate parameters and URLs, like this:\n%s' %
- args_to_str(correct_argv))
+ shell_quote(correct_argv))
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
@@ -1343,7 +1355,7 @@ class YoutubeDL:
value, fmt = escapeHTML(str(value)), str_fmt
elif fmt[-1] == 'q': # quoted
value = map(str, variadic(value) if '#' in flags else [value])
- value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
+ value, fmt = shell_quote(value, shell=True), str_fmt
elif fmt[-1] == 'B': # bytes
value = f'%{str_fmt}'.encode() % str(value).encode()
value, fmt = value.decode('utf-8', 'ignore'), 's'
@@ -2124,6 +2136,11 @@ class YoutubeDL:
def _check_formats(self, formats):
for f in formats:
+ working = f.get('__working')
+ if working is not None:
+ if working:
+ yield f
+ continue
self.to_screen('[info] Testing format %s' % f['format_id'])
path = self.get_output_path('temp')
if not self._ensure_dir_exists(f'{path}/'):
@@ -2140,33 +2157,44 @@ class YoutubeDL:
os.remove(temp_file.name)
except OSError:
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
+ f['__working'] = success
if success:
yield f
else:
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
+ def _select_formats(self, formats, selector):
+ return list(selector({
+ 'formats': formats,
+ 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
+ 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
+ or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
+ }))
+
def _default_format_spec(self, info_dict, download=True):
+ download = download and not self.params.get('simulate')
+ prefer_best = download and (
+ self.params['outtmpl']['default'] == '-'
+ or info_dict.get('is_live') and not self.params.get('live_from_start'))
def can_merge():
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
- prefer_best = (
- not self.params.get('simulate')
- and download
- and (
- not can_merge()
- or info_dict.get('is_live') and not self.params.get('live_from_start')
- or self.params['outtmpl']['default'] == '-'))
- compat = (
- prefer_best
- or self.params.get('allow_multiple_audio_streams', False)
- or 'format-spec' in self.params['compat_opts'])
-
- return (
- 'best/bestvideo+bestaudio' if prefer_best
- else 'bestvideo*+bestaudio/best' if not compat
- else 'bestvideo+bestaudio/best')
+ if not prefer_best and download and not can_merge():
+ prefer_best = True
+ formats = self._get_formats(info_dict)
+ evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
+ if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
+ self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
+ 'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
+
+ compat = (self.params.get('allow_multiple_audio_streams')
+ or 'format-spec' in self.params['compat_opts'])
+
+ return ('best/bestvideo+bestaudio' if prefer_best
+ else 'bestvideo+bestaudio/best' if compat
+ else 'bestvideo*+bestaudio/best')
def build_format_selector(self, format_spec):
def syntax_error(note, start):
@@ -2617,7 +2645,7 @@ class YoutubeDL:
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
# see http://bugs.python.org/issue1646728)
with contextlib.suppress(ValueError, OverflowError, OSError):
- upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
+ upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)
info_dict[date_key] = upload_date.strftime('%Y%m%d')
if not info_dict.get('release_year'):
@@ -2771,7 +2799,7 @@ class YoutubeDL:
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
if not get_from_start:
- info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+ info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')
if info_dict.get('is_live') and formats:
formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
if get_from_start and not formats:
@@ -2802,6 +2830,9 @@ class YoutubeDL:
format['url'] = sanitize_url(format['url'])
if format.get('ext') is None:
format['ext'] = determine_ext(format['url']).lower()
+ if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
+ if format.get('acodec') is None:
+ format['acodec'] = format['ext']
if format.get('protocol') is None:
format['protocol'] = determine_protocol(format)
if format.get('resolution') is None:
@@ -2812,9 +2843,8 @@ class YoutubeDL:
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
# For fragmented formats, "tbr" is often max bitrate and not average
if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
- and info_dict.get('duration') and format.get('tbr')
and not format.get('filesize') and not format.get('filesize_approx')):
- format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
+ format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration'))
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
# Safeguard against old/insecure infojson when using --load-info-json
@@ -2914,12 +2944,7 @@ class YoutubeDL:
self.write_debug(f'Default format spec: {req_format}')
format_selector = self.build_format_selector(req_format)
- formats_to_download = list(format_selector({
- 'formats': formats,
- 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
- 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
- or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
- }))
+ formats_to_download = self._select_formats(formats, format_selector)
if interactive_format_selection and not formats_to_download:
self.report_error('Requested format is not available', tb=False, is_error=False)
continue
@@ -3046,7 +3071,7 @@ class YoutubeDL:
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
- 'using %s' % (formats_query, lang, f['ext']))
+ 'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
subs[lang] = f
return subs
@@ -3864,8 +3889,8 @@ class YoutubeDL:
delim, (
format_field(f, 'filesize', ' \t%s', func=format_bytes)
or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
- or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
- None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
+ or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,
+ self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),
format_field(f, 'tbr', '\t%dk', func=round),
shorten_protocol_name(f.get('protocol', '')),
delim,
@@ -4077,6 +4102,22 @@ class YoutubeDL:
handler = self._request_director.handlers['Urllib']
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
+ def _get_available_impersonate_targets(self):
+ # todo(future): make available as public API
+ return [
+ (target, rh.RH_NAME)
+ for rh in self._request_director.handlers.values()
+ if isinstance(rh, ImpersonateRequestHandler)
+ for target in rh.supported_targets
+ ]
+
+ def _impersonate_target_available(self, target):
+ # todo(future): make available as public API
+ return any(
+ rh.is_supported_target(target)
+ for rh in self._request_director.handlers.values()
+ if isinstance(rh, ImpersonateRequestHandler))
+
def urlopen(self, req):
""" Start an HTTP download """
if isinstance(req, str):
@@ -4108,9 +4149,13 @@ class YoutubeDL:
raise RequestError(
'file:// URLs are disabled by default in yt-dlp for security reasons. '
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
- if 'unsupported proxy type: "https"' in ue.msg.lower():
+ if (
+ 'unsupported proxy type: "https"' in ue.msg.lower()
+ and 'requests' not in self._request_director.handlers
+ and 'curl_cffi' not in self._request_director.handlers
+ ):
raise RequestError(
- 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
+ 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
elif (
re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
@@ -4120,6 +4165,13 @@ class YoutubeDL:
'This request requires WebSocket support. '
'Ensure one of the following dependencies are installed: websockets',
cause=ue) from ue
+
+ elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()):
+ raise RequestError(
+ f'Impersonate target "{req.extensions["impersonate"]}" is not available.'
+ f' See --list-impersonate-targets for available targets.'
+ f' This request requires browser impersonation, however you may be missing dependencies'
+ f' required to support this target.')
raise
except SSLError as e:
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
@@ -4152,6 +4204,7 @@ class YoutubeDL:
'timeout': 'socket_timeout',
'legacy_ssl_support': 'legacyserverconnect',
'enable_file_urls': 'enable_file_urls',
+ 'impersonate': 'impersonate',
'client_cert': {
'client_certificate': 'client_certificate',
'client_certificate_key': 'client_certificate_key',
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index aeea262..3d606bc 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -19,6 +19,7 @@ from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes
from .extractor.adobepass import MSO_INFO
+from .networking.impersonate import ImpersonateTarget
from .options import parseOpts
from .postprocessor import (
FFmpegExtractAudioPP,
@@ -48,6 +49,7 @@ from .utils import (
float_or_none,
format_field,
int_or_none,
+ join_nonempty,
match_filter_func,
parse_bytes,
parse_duration,
@@ -388,6 +390,9 @@ def validate_options(opts):
f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
opts.cookiesfrombrowser = (browser_name, profile, keyring, container)
+ if opts.impersonate is not None:
+ opts.impersonate = ImpersonateTarget.from_str(opts.impersonate.lower())
+
# MetadataParser
def metadataparser_actions(f):
if isinstance(f, str):
@@ -831,6 +836,7 @@ def parse_options(argv=None):
'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress,
'progress_with_newline': opts.progress_with_newline,
'progress_template': opts.progress_template,
+ 'progress_delta': opts.progress_delta,
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
'playlistreverse': opts.playlist_reverse,
@@ -911,6 +917,7 @@ def parse_options(argv=None):
'postprocessors': postprocessors,
'fixup': opts.fixup,
'source_address': opts.source_address,
+ 'impersonate': opts.impersonate,
'call_home': opts.call_home,
'sleep_interval_requests': opts.sleep_interval_requests,
'sleep_interval': opts.sleep_interval,
@@ -980,6 +987,41 @@ def _real_main(argv=None):
traceback.print_exc()
ydl._download_retcode = 100
+ if opts.list_impersonate_targets:
+
+ known_targets = [
+ # List of simplified targets we know are supported,
+ # to help users know what dependencies may be required.
+ (ImpersonateTarget('chrome'), 'curl_cffi'),
+ (ImpersonateTarget('edge'), 'curl_cffi'),
+ (ImpersonateTarget('safari'), 'curl_cffi'),
+ ]
+
+ available_targets = ydl._get_available_impersonate_targets()
+
+ def make_row(target, handler):
+ return [
+ join_nonempty(target.client.title(), target.version, delim='-') or '-',
+ join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-',
+ handler,
+ ]
+
+ rows = [make_row(target, handler) for target, handler in available_targets]
+
+ for known_target, known_handler in known_targets:
+ if not any(
+ known_target in target and handler == known_handler
+ for target, handler in available_targets
+ ):
+ rows.append([
+ ydl._format_out(text, ydl.Styles.SUPPRESS)
+ for text in make_row(known_target, f'{known_handler} (not available)')
+ ])
+
+ ydl.to_screen('[info] Available impersonate targets')
+ ydl.to_stdout(render_table(['Client', 'OS', 'Source'], rows, extra_gap=2, delim='-'))
+ return
+
if not actual_use:
if pre_process:
return ydl._download_retcode
diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py
index 7c3dbfb..8e7f42f 100644
--- a/yt_dlp/__pyinstaller/hook-yt_dlp.py
+++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py
@@ -1,6 +1,6 @@
import sys
-from PyInstaller.utils.hooks import collect_submodules
+from PyInstaller.utils.hooks import collect_submodules, collect_data_files
def pycryptodome_module():
@@ -25,10 +25,12 @@ def get_hidden_imports():
for module in ('websockets', 'requests', 'urllib3'):
yield from collect_submodules(module)
# These are auto-detected, but explicitly add them just in case
- yield from ('mutagen', 'brotli', 'certifi', 'secretstorage')
+ yield from ('mutagen', 'brotli', 'certifi', 'secretstorage', 'curl_cffi')
hiddenimports = list(get_hidden_imports())
print(f'Adding imports: {hiddenimports}')
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
+
+datas = collect_data_files('curl_cffi', includes=['cacert.pem'])
diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py
index 5ad5c70..d820ada 100644
--- a/yt_dlp/compat/__init__.py
+++ b/yt_dlp/compat/__init__.py
@@ -27,12 +27,9 @@ def compat_etree_fromstring(text):
compat_os_name = os._name if os.name == 'java' else os.name
-if compat_os_name == 'nt':
- def compat_shlex_quote(s):
- import re
- return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""')
-else:
- from shlex import quote as compat_shlex_quote # noqa: F401
+def compat_shlex_quote(s):
+ from ..utils import shell_quote
+ return shell_quote(s)
def compat_ord(c):
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 28d174a..815897d 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -1,6 +1,7 @@
import base64
import collections
import contextlib
+import datetime as dt
import glob
import http.cookiejar
import http.cookies
@@ -15,7 +16,6 @@ import sys
import tempfile
import time
import urllib.request
-from datetime import datetime, timedelta, timezone
from enum import Enum, auto
from hashlib import pbkdf2_hmac
@@ -46,7 +46,7 @@ from .utils import (
from .utils._utils import _YDLLogger
from .utils.networking import normalize_url
-CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
+CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
@@ -194,7 +194,11 @@ def _firefox_browser_dirs():
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
else:
- yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox'))
+ yield from map(os.path.expanduser, (
+ '~/.mozilla/firefox',
+ '~/snap/firefox/common/.mozilla/firefox',
+ '~/.var/app/org.mozilla.firefox/.mozilla/firefox',
+ ))
def _firefox_cookie_dbs(roots):
@@ -215,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
+ 'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
}[browser_name]
elif sys.platform == 'darwin':
@@ -226,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata, 'Microsoft Edge'),
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
'vivaldi': os.path.join(appdata, 'Vivaldi'),
+ 'whale': os.path.join(appdata, 'Naver/Whale'),
}[browser_name]
else:
@@ -237,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(config, 'microsoft-edge'),
'opera': os.path.join(config, 'opera'),
'vivaldi': os.path.join(config, 'vivaldi'),
+ 'whale': os.path.join(config, 'naver-whale'),
}[browser_name]
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
@@ -248,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
+ 'whale': 'Whale',
}[browser_name]
browsers_without_profiles = {'opera'}
@@ -343,6 +351,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
if value is None:
return is_encrypted, None
+ # In chrome, session cookies have expires_utc set to 0
+ # In our cookie-store, cookies that do not expire should have expires set to None
+ if not expires_utc:
+ expires_utc = None
+
return is_encrypted, http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
@@ -594,7 +607,7 @@ class DataParser:
def _mac_absolute_time_to_posix(timestamp):
- return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
+ return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
def _parse_safari_cookies_header(data, logger):
diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py
index 9e3f907..0d58da2 100644
--- a/yt_dlp/dependencies/__init__.py
+++ b/yt_dlp/dependencies/__init__.py
@@ -74,6 +74,10 @@ else:
if hasattr(xattr, 'set'): # pyxattr
xattr._yt_dlp__identifier = 'pyxattr'
+try:
+ import curl_cffi
+except ImportError:
+ curl_cffi = None
from . import Cryptodome
diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py
index b71d7ee..65a0d6f 100644
--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@@ -4,6 +4,7 @@ import functools
import os
import random
import re
+import threading
import time
from ..minicurses import (
@@ -63,6 +64,7 @@ class FileDownloader:
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
+ progress_delta: The minimum time between progress output, in seconds
external_downloader_args: A dictionary of downloader keys (in lower case)
and a list of additional command-line arguments for the
executable. Use 'default' as the name for arguments to be
@@ -88,6 +90,9 @@ class FileDownloader:
self.params = params
self._prepare_multiline_status()
self.add_progress_hook(self.report_progress)
+ if self.params.get('progress_delta'):
+ self._progress_delta_lock = threading.Lock()
+ self._progress_delta_time = time.monotonic()
def _set_ydl(self, ydl):
self.ydl = ydl
@@ -366,6 +371,12 @@ class FileDownloader:
if s['status'] != 'downloading':
return
+ if update_delta := self.params.get('progress_delta'):
+ with self._progress_delta_lock:
+ if time.monotonic() < self._progress_delta_time:
+ return
+ self._progress_delta_time += update_delta
+
s.update({
'_eta_str': self.format_eta(s.get('eta')).strip(),
'_speed_str': self.format_speed(s.get('speed')),
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index ce5eeb0..8b0b94e 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -491,7 +491,7 @@ class FFmpegFD(ExternalFD):
if not self.params.get('verbose'):
args += ['-hide_banner']
- args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
+ args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
# These exists only for compatibility. Extractors should use
# info_dict['downloader_options']['ffmpeg_args'] instead
@@ -615,6 +615,8 @@ class FFmpegFD(ExternalFD):
else:
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
+ args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...))
+
args += self._configuration_args(('_o1', '_o', ''))
args = [encodeArgument(opt) for opt in args]
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index c753655..e9cd38a 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1,4 +1,5 @@
# flake8: noqa: F401
+# isort: off
from .youtube import ( # Youtube is moved to the top to improve performance
YoutubeIE,
@@ -24,6 +25,8 @@ from .youtube import ( # Youtube is moved to the top to improve performance
YoutubeConsentRedirectIE,
)
+# isort: on
+
from .abc import (
ABCIE,
ABCIViewIE,
@@ -43,27 +46,33 @@ from .abematv import (
)
from .academicearth import AcademicEarthCourseIE
from .acast import (
- ACastIE,
ACastChannelIE,
+ ACastIE,
+)
+from .acfun import (
+ AcFunBangumiIE,
+ AcFunVideoIE,
+)
+from .adn import (
+ ADNIE,
+ ADNSeasonIE,
)
-from .acfun import AcFunVideoIE, AcFunBangumiIE
-from .adn import ADNIE, ADNSeasonIE
from .adobeconnect import AdobeConnectIE
from .adobetv import (
+ AdobeTVChannelIE,
AdobeTVEmbedIE,
AdobeTVIE,
AdobeTVShowIE,
- AdobeTVChannelIE,
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
from .aenetworks import (
- AENetworksIE,
AENetworksCollectionIE,
+ AENetworksIE,
AENetworksShowIE,
- HistoryTopicIE,
- HistoryPlayerIE,
BiographyIE,
+ HistoryPlayerIE,
+ HistoryTopicIE,
)
from .aeonco import AeonCoIE
from .afreecatv import (
@@ -79,77 +88,86 @@ from .agora import (
)
from .airtv import AirTVIE
from .aitube import AitubeKZVideoIE
+from .aliexpress import AliExpressLiveIE
from .aljazeera import AlJazeeraIE
+from .allocine import AllocineIE
from .allstar import (
AllstarIE,
AllstarProfileIE,
)
from .alphaporno import AlphaPornoIE
+from .alsace20tv import (
+ Alsace20TVEmbedIE,
+ Alsace20TVIE,
+)
from .altcensored import (
- AltCensoredIE,
AltCensoredChannelIE,
+ AltCensoredIE,
)
from .alura import (
+ AluraCourseIE,
AluraIE,
- AluraCourseIE
)
from .amadeustv import AmadeusTVIE
from .amara import AmaraIE
-from .amcnetworks import AMCNetworksIE
from .amazon import (
- AmazonStoreIE,
AmazonReviewsIE,
+ AmazonStoreIE,
)
from .amazonminitv import (
AmazonMiniTVIE,
AmazonMiniTVSeasonIE,
AmazonMiniTVSeriesIE,
)
+from .amcnetworks import AMCNetworksIE
from .americastestkitchen import (
AmericasTestKitchenIE,
AmericasTestKitchenSeasonIE,
)
from .anchorfm import AnchorFMEpisodeIE
from .angel import AngelIE
+from .antenna import (
+ Ant1NewsGrArticleIE,
+ Ant1NewsGrEmbedIE,
+ AntennaGrWatchIE,
+)
from .anvato import AnvatoIE
from .aol import AolIE
-from .allocine import AllocineIE
-from .aliexpress import AliExpressLiveIE
-from .alsace20tv import (
- Alsace20TVIE,
- Alsace20TVEmbedIE,
-)
from .apa import APAIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
+from .applepodcasts import ApplePodcastsIE
from .appletrailers import (
AppleTrailersIE,
AppleTrailersSectionIE,
)
-from .applepodcasts import ApplePodcastsIE
from .archiveorg import (
ArchiveOrgIE,
YoutubeWebArchiveIE,
)
from .arcpublishing import ArcPublishingIE
-from .arkena import ArkenaIE
from .ard import (
+ ARDIE,
ARDBetaMediathekIE,
ARDMediathekCollectionIE,
- ARDIE,
)
+from .arkena import ArkenaIE
+from .arnes import ArnesIE
from .art19 import (
Art19IE,
Art19ShowIE,
)
from .arte import (
- ArteTVIE,
+ ArteTVCategoryIE,
ArteTVEmbedIE,
+ ArteTVIE,
ArteTVPlaylistIE,
- ArteTVCategoryIE,
)
-from .arnes import ArnesIE
-from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
+from .asobichannel import (
+ AsobiChannelIE,
+ AsobiChannelTagURLIE,
+)
+from .asobistage import AsobiStageIE
from .atresplayer import AtresPlayerIE
from .atscaleconf import AtScaleConfEventIE
from .atvat import ATVAtIE
@@ -159,57 +177,60 @@ from .audiodraft import (
AudiodraftCustomIE,
AudiodraftGenericIE,
)
-from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .audiomack import (
+ AudiomackAlbumIE,
+ AudiomackIE,
+)
from .audius import (
AudiusIE,
- AudiusTrackIE,
AudiusPlaylistIE,
AudiusProfileIE,
+ AudiusTrackIE,
)
from .awaan import (
AWAANIE,
- AWAANVideoIE,
AWAANLiveIE,
AWAANSeasonIE,
+ AWAANVideoIE,
)
from .axs import AxsIE
from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE
from .banbye import (
- BanByeIE,
BanByeChannelIE,
+ BanByeIE,
)
from .bandaichannel import BandaiChannelIE
from .bandcamp import (
- BandcampIE,
BandcampAlbumIE,
- BandcampWeeklyIE,
+ BandcampIE,
BandcampUserIE,
+ BandcampWeeklyIE,
)
from .bannedvideo import BannedVideoIE
from .bbc import (
- BBCCoUkIE,
+ BBCIE,
BBCCoUkArticleIE,
+ BBCCoUkIE,
BBCCoUkIPlayerEpisodesIE,
BBCCoUkIPlayerGroupIE,
BBCCoUkPlaylistIE,
- BBCIE,
)
-from .beeg import BeegIE
-from .behindkink import BehindKinkIE
-from .bellmedia import BellMediaIE
from .beatbump import (
- BeatBumpVideoIE,
BeatBumpPlaylistIE,
+ BeatBumpVideoIE,
)
from .beatport import BeatportIE
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .bellmedia import BellMediaIE
from .berufetv import BerufeTVIE
from .bet import BetIE
from .bfi import BFIPlayerIE
from .bfmtv import (
BFMTVIE,
- BFMTVLiveIE,
BFMTVArticleIE,
+ BFMTVLiveIE,
)
from .bibeltv import (
BibelTVLiveIE,
@@ -220,37 +241,37 @@ from .bigflix import BigflixIE
from .bigo import BigoIE
from .bild import BildIE
from .bilibili import (
- BiliBiliIE,
+ BilibiliAudioAlbumIE,
+ BilibiliAudioIE,
BiliBiliBangumiIE,
- BiliBiliBangumiSeasonIE,
BiliBiliBangumiMediaIE,
+ BiliBiliBangumiSeasonIE,
+ BilibiliCategoryIE,
BilibiliCheeseIE,
BilibiliCheeseSeasonIE,
- BiliBiliSearchIE,
- BilibiliCategoryIE,
- BilibiliAudioIE,
- BilibiliAudioAlbumIE,
- BiliBiliPlayerIE,
- BilibiliSpaceVideoIE,
- BilibiliSpaceAudioIE,
BilibiliCollectionListIE,
- BilibiliSeriesListIE,
BilibiliFavoritesListIE,
- BilibiliWatchlaterIE,
+ BiliBiliIE,
+ BiliBiliPlayerIE,
BilibiliPlaylistIE,
+ BiliBiliSearchIE,
+ BilibiliSeriesListIE,
+ BilibiliSpaceAudioIE,
+ BilibiliSpaceVideoIE,
+ BilibiliWatchlaterIE,
BiliIntlIE,
BiliIntlSeriesIE,
BiliLiveIE,
)
from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
- BitChuteIE,
BitChuteChannelIE,
+ BitChuteIE,
)
from .blackboardcollaborate import BlackboardCollaborateIE
from .bleacherreport import (
- BleacherReportIE,
BleacherReportCMSIE,
+ BleacherReportIE,
)
from .blerp import BlerpIE
from .blogger import BloggerIE
@@ -263,69 +284,69 @@ from .box import BoxIE
from .boxcast import BoxCastVideoIE
from .bpb import BpbIE
from .br import BRIE
-from .bravotv import BravoTVIE
from .brainpop import (
- BrainPOPIE,
- BrainPOPJrIE,
BrainPOPELLIE,
BrainPOPEspIE,
BrainPOPFrIE,
+ BrainPOPIE,
BrainPOPIlIE,
+ BrainPOPJrIE,
)
+from .bravotv import BravoTVIE
from .breitbart import BreitBartIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .brilliantpala import (
- BrilliantpalaElearnIE,
BrilliantpalaClassesIE,
+ BrilliantpalaElearnIE,
)
-from .businessinsider import BusinessInsiderIE
from .bundesliga import BundesligaIE
from .bundestag import BundestagIE
+from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
-from .cableav import CableAVIE
+from .caffeinetv import CaffeineTVIE
from .callin import CallinIE
from .caltrans import CaltransIE
from .cam4 import CAM4IE
from .camdemy import (
+ CamdemyFolderIE,
CamdemyIE,
- CamdemyFolderIE
)
from .camfm import (
CamFMEpisodeIE,
- CamFMShowIE
+ CamFMShowIE,
)
from .cammodels import CamModelsIE
from .camsoda import CamsodaIE
from .camtasia import CamtasiaEmbedIE
from .canal1 import Canal1IE
from .canalalpha import CanalAlphaIE
-from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
+from .canalplus import CanalplusIE
from .caracoltv import CaracolTvPlayIE
from .cartoonnetwork import CartoonNetworkIE
from .cbc import (
CBCIE,
- CBCPlayerIE,
- CBCPlayerPlaylistIE,
CBCGemIE,
- CBCGemPlaylistIE,
CBCGemLiveIE,
+ CBCGemPlaylistIE,
+ CBCPlayerIE,
+ CBCPlayerPlaylistIE,
)
from .cbs import (
CBSIE,
ParamountPressExpressIE,
)
from .cbsnews import (
- CBSNewsEmbedIE,
- CBSNewsIE,
- CBSLocalIE,
CBSLocalArticleIE,
+ CBSLocalIE,
CBSLocalLiveIE,
+ CBSNewsEmbedIE,
+ CBSNewsIE,
CBSNewsLiveIE,
CBSNewsLiveVideoIE,
)
@@ -354,12 +375,12 @@ from .chzzk import (
from .cinemax import CinemaxIE
from .cinetecamilano import CinetecaMilanoIE
from .cineverse import (
- CineverseIE,
CineverseDetailsIE,
+ CineverseIE,
)
from .ciscolive import (
- CiscoLiveSessionIE,
CiscoLiveSearchIE,
+ CiscoLiveSessionIE,
)
from .ciscowebex import CiscoWebexIE
from .cjsw import CJSWIE
@@ -372,21 +393,22 @@ from .cloudycdn import CloudyCDNIE
from .clubic import ClubicIE
from .clyp import ClypIE
from .cmt import CMTIE
-from .cnbc import (
- CNBCVideoIE,
-)
+from .cnbc import CNBCVideoIE
from .cnn import (
CNNIE,
- CNNBlogsIE,
CNNArticleIE,
+ CNNBlogsIE,
CNNIndonesiaIE,
)
-from .coub import CoubIE
from .comedycentral import (
ComedyCentralIE,
ComedyCentralTVIE,
)
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonmistakes import (
+ BlobIE,
+ CommonMistakesIE,
+ UnicodeBOMIE,
+)
from .commonprotocols import (
MmsIE,
RtmpIE,
@@ -395,44 +417,48 @@ from .commonprotocols import (
from .condenast import CondeNastIE
from .contv import CONtvIE
from .corus import CorusIE
+from .coub import CoubIE
+from .cozytv import CozyTVIE
from .cpac import (
CPACIE,
CPACPlaylistIE,
)
-from .cozytv import CozyTVIE
from .cracked import CrackedIE
from .crackle import CrackleIE
from .craftsy import CraftsyIE
from .crooksandliars import CrooksAndLiarsIE
from .crowdbunker import (
- CrowdBunkerIE,
CrowdBunkerChannelIE,
+ CrowdBunkerIE,
)
from .crtvg import CrtvgIE
from .crunchyroll import (
+ CrunchyrollArtistIE,
CrunchyrollBetaIE,
CrunchyrollBetaShowIE,
CrunchyrollMusicIE,
- CrunchyrollArtistIE,
)
-from .cspan import CSpanIE, CSpanCongressIE
+from .cspan import (
+ CSpanCongressIE,
+ CSpanIE,
+)
from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE
from .curiositystream import (
- CuriosityStreamIE,
CuriosityStreamCollectionsIE,
+ CuriosityStreamIE,
CuriosityStreamSeriesIE,
)
from .cwtv import CWTVIE
from .cybrary import (
+ CybraryCourseIE,
CybraryIE,
- CybraryCourseIE
)
from .dacast import (
- DacastVODIE,
DacastPlaylistIE,
+ DacastVODIE,
)
from .dailymail import DailyMailIE
from .dailymotion import (
@@ -449,9 +475,13 @@ from .damtomo import (
DamtomoRecordIE,
DamtomoVideoIE,
)
+from .dangalplay import (
+ DangalPlayIE,
+ DangalPlaySeasonIE,
+)
from .daum import (
- DaumIE,
DaumClipIE,
+ DaumIE,
DaumPlaylistIE,
DaumUserIE,
)
@@ -459,49 +489,69 @@ from .daystar import DaystarClipIE
from .dbtv import DBTVIE
from .dctp import DctpTvIE
from .deezer import (
- DeezerPlaylistIE,
DeezerAlbumIE,
+ DeezerPlaylistIE,
)
from .democracynow import DemocracynowIE
from .detik import DetikEmbedIE
+from .deuxm import (
+ DeuxMIE,
+ DeuxMNewsIE,
+)
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .digitalconcerthall import DigitalConcertHallIE
+from .digiteka import DigitekaIE
+from .discogs import DiscogsReleasePlaylistIE
+from .discovery import DiscoveryIE
+from .disney import DisneyIE
+from .dispeak import DigitallySpeakingIE
from .dlf import (
DLFIE,
DLFCorpusIE,
)
-from .dfb import DFBIE
-from .dhm import DHMIE
+from .dlive import (
+ DLiveStreamIE,
+ DLiveVODIE,
+)
from .douyutv import (
DouyuShowIE,
DouyuTVIE,
)
from .dplay import (
- DPlayIE,
- DiscoveryPlusIE,
- HGTVDeIE,
- GoDiscoveryIE,
- TravelChannelIE,
+ TLCIE,
+ AmHistoryChannelIE,
+ AnimalPlanetIE,
CookingChannelIE,
- HGTVUsaIE,
- FoodNetworkIE,
- InvestigationDiscoveryIE,
DestinationAmericaIE,
- AmHistoryChannelIE,
- ScienceChannelIE,
- DIYNetworkIE,
DiscoveryLifeIE,
- AnimalPlanetIE,
- TLCIE,
- MotorTrendIE,
- MotorTrendOnDemandIE,
- DiscoveryPlusIndiaIE,
DiscoveryNetworksDeIE,
+ DiscoveryPlusIE,
+ DiscoveryPlusIndiaIE,
+ DiscoveryPlusIndiaShowIE,
DiscoveryPlusItalyIE,
DiscoveryPlusItalyShowIE,
- DiscoveryPlusIndiaShowIE,
+ DIYNetworkIE,
+ DPlayIE,
+ FoodNetworkIE,
GlobalCyclingNetworkPlusIE,
+ GoDiscoveryIE,
+ HGTVDeIE,
+ HGTVUsaIE,
+ InvestigationDiscoveryIE,
+ MotorTrendIE,
+ MotorTrendOnDemandIE,
+ ScienceChannelIE,
+ TravelChannelIE,
)
-from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
+from .dreisat import DreiSatIE
+from .drooble import DroobleIE
+from .dropbox import DropboxIE
+from .dropout import (
+ DropoutIE,
+ DropoutSeasonIE,
+)
from .drtuber import DrTuberIE
from .drtv import (
DRTVIE,
@@ -510,32 +560,21 @@ from .drtv import (
DRTVSeriesIE,
)
from .dtube import DTubeIE
-from .dvtv import DVTVIE
from .duboku import (
DubokuIE,
- DubokuPlaylistIE
+ DubokuPlaylistIE,
)
from .dumpert import DumpertIE
-from .deuxm import (
- DeuxMIE,
- DeuxMNewsIE
-)
-from .digitalconcerthall import DigitalConcertHallIE
-from .discogs import DiscogsReleasePlaylistIE
-from .discovery import DiscoveryIE
-from .disney import DisneyIE
-from .dispeak import DigitallySpeakingIE
-from .dropbox import DropboxIE
-from .dropout import (
- DropoutSeasonIE,
- DropoutIE
-)
from .duoplay import DuoplayIE
+from .dvtv import DVTVIE
from .dw import (
DWIE,
DWArticleIE,
)
-from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
+from .eagleplatform import (
+ ClipYouEmbedIE,
+ EaglePlatformIE,
+)
from .ebaumsworld import EbaumsWorldIE
from .ebay import EbayIE
from .egghead import (
@@ -543,7 +582,6 @@ from .egghead import (
EggheadLessonIE,
)
from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
from .eitb import EitbIE
from .elementorembed import ElementorEmbedIE
from .elonet import ElonetIE
@@ -560,8 +598,8 @@ from .epoch import EpochIE
from .eporner import EpornerIE
from .erocast import ErocastIE
from .eroprofile import (
- EroProfileIE,
EroProfileAlbumIE,
+ EroProfileIE,
)
from .err import ERRJupiterIE
from .ertgr import (
@@ -571,30 +609,33 @@ from .ertgr import (
)
from .espn import (
ESPNIE,
- WatchESPNIE,
ESPNArticleIE,
- FiveThirtyEightIE,
ESPNCricInfoIE,
+ FiveThirtyEightIE,
+ WatchESPNIE,
)
from .ettutv import EttuTvIE
-from .europa import EuropaIE, EuroParlWebstreamIE
+from .europa import (
+ EuropaIE,
+ EuroParlWebstreamIE,
+)
from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE
from .euscreen import EUScreenIE
from .expressen import ExpressenIE
from .eyedotv import EyedoTVIE
from .facebook import (
+ FacebookAdsIE,
FacebookIE,
FacebookPluginsVideoIE,
FacebookRedirectURLIE,
FacebookReelIE,
- FacebookAdsIE,
)
from .fancode import (
+ FancodeLiveIE,
FancodeVodIE,
- FancodeLiveIE
)
-
+from .fathom import FathomIE
from .faz import FazIE
from .fc2 import (
FC2IE,
@@ -604,8 +645,8 @@ from .fc2 import (
from .fczenit import FczenitIE
from .fifa import FifaIE
from .filmon import (
- FilmOnIE,
FilmOnChannelIE,
+ FilmOnIE,
)
from .filmweb import FilmwebIE
from .firsttv import FirstTVIE
@@ -613,17 +654,17 @@ from .fivetv import FiveTVIE
from .flextv import FlexTVIE
from .flickr import FlickrIE
from .floatplane import (
- FloatplaneIE,
FloatplaneChannelIE,
+ FloatplaneIE,
)
from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE
from .formula1 import Formula1IE
from .fourtube import (
FourTubeIE,
- PornTubeIE,
- PornerBrosIE,
FuxIE,
+ PornerBrosIE,
+ PornTubeIE,
)
from .fox import FOXIE
from .fox9 import (
@@ -631,8 +672,8 @@ from .fox9 import (
FOX9NewsIE,
)
from .foxnews import (
- FoxNewsIE,
FoxNewsArticleIE,
+ FoxNewsIE,
FoxNewsVideoIE,
)
from .foxsports import FoxSportsIE
@@ -640,20 +681,20 @@ from .fptplay import FptplayIE
from .franceinter import FranceInterIE
from .francetv import (
FranceTVIE,
- FranceTVSiteIE,
FranceTVInfoIE,
+ FranceTVSiteIE,
)
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
-from .frontendmasters import (
- FrontendMastersIE,
- FrontendMastersLessonIE,
- FrontendMastersCourseIE
-)
from .freetv import (
FreeTvIE,
FreeTvMoviesIE,
)
+from .frontendmasters import (
+ FrontendMastersCourseIE,
+ FrontendMastersIE,
+ FrontendMastersLessonIE,
+)
from .fujitv import FujiTVFODPlus7IE
from .funimation import (
FunimationIE,
@@ -664,32 +705,37 @@ from .funk import FunkIE
from .funker530 import Funker530IE
from .fuyintv import FuyinTVIE
from .gab import (
- GabTVIE,
GabIE,
+ GabTVIE,
)
from .gaia import GaiaIE
from .gamejolt import (
- GameJoltIE,
- GameJoltUserIE,
+ GameJoltCommunityIE,
GameJoltGameIE,
GameJoltGameSoundtrackIE,
- GameJoltCommunityIE,
+ GameJoltIE,
GameJoltSearchIE,
+ GameJoltUserIE,
)
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gaskrank import GaskrankIE
from .gazeta import GazetaIE
+from .gbnews import GBNewsIE
from .gdcvault import GDCVaultIE
from .gedidigital import GediDigitalIE
from .generic import GenericIE
+from .genericembeds import (
+ HTML5MediaEmbedIE,
+ QuotedHTMLIE,
+)
from .genius import (
GeniusIE,
GeniusLyricsIE,
)
from .getcourseru import (
+ GetCourseRuIE,
GetCourseRuPlayerIE,
- GetCourseRuIE
)
from .gettr import (
GettrIE,
@@ -698,40 +744,45 @@ from .gettr import (
from .giantbomb import GiantBombIE
from .glide import GlideIE
from .globalplayer import (
+ GlobalPlayerAudioEpisodeIE,
+ GlobalPlayerAudioIE,
GlobalPlayerLiveIE,
GlobalPlayerLivePlaylistIE,
- GlobalPlayerAudioIE,
- GlobalPlayerAudioEpisodeIE,
- GlobalPlayerVideoIE
+ GlobalPlayerVideoIE,
)
from .globo import (
- GloboIE,
GloboArticleIE,
+ GloboIE,
+)
+from .glomex import (
+ GlomexEmbedIE,
+ GlomexIE,
)
from .gmanetwork import GMANetworkVideoIE
from .go import GoIE
+from .godresource import GodResourceIE
from .godtube import GodTubeIE
from .gofile import GofileIE
from .golem import GolemIE
from .goodgame import GoodGameIE
from .googledrive import (
- GoogleDriveIE,
GoogleDriveFolderIE,
+ GoogleDriveIE,
)
from .googlepodcasts import (
- GooglePodcastsIE,
GooglePodcastsFeedIE,
+ GooglePodcastsIE,
)
from .googlesearch import GoogleSearchIE
-from .gopro import GoProIE
from .goplay import GoPlayIE
+from .gopro import GoProIE
from .goshgay import GoshgayIE
from .gotostage import GoToStageIE
from .gputechconf import GPUTechConfIE
from .gronkh import (
- GronkhIE,
GronkhFeedIE,
- GronkhVodsIE
+ GronkhIE,
+ GronkhVodsIE,
)
from .groupon import GrouponIE
from .harpodeon import HarpodeonIE
@@ -740,10 +791,10 @@ from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
from .hgtv import HGTVComShowIE
-from .hketv import HKETVIE
from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitrecord import HitRecordIE
+from .hketv import HKETVIE
from .hollywoodreporter import (
HollywoodReporterIE,
HollywoodReporterPlaylistIE,
@@ -752,8 +803,8 @@ from .holodex import HolodexIE
from .hotnewhiphop import HotNewHipHopIE
from .hotstar import (
HotStarIE,
- HotStarPrefixIE,
HotStarPlaylistIE,
+ HotStarPrefixIE,
HotStarSeasonIE,
HotStarSeriesIE,
)
@@ -764,34 +815,30 @@ from .hrti import (
HRTiPlaylistIE,
)
from .hse import (
- HSEShowIE,
HSEProductIE,
-)
-from .genericembeds import (
- HTML5MediaEmbedIE,
- QuotedHTMLIE,
+ HSEShowIE,
)
from .huajiao import HuajiaoIE
-from .huya import HuyaLiveIE
from .huffpost import HuffPostIE
from .hungama import (
+ HungamaAlbumPlaylistIE,
HungamaIE,
HungamaSongIE,
- HungamaAlbumPlaylistIE,
)
+from .huya import HuyaLiveIE
from .hypem import HypemIE
from .hypergryph import MonsterSirenHypergryphMusicIE
from .hytale import HytaleIE
from .icareus import IcareusIE
from .ichinanalive import (
- IchinanaLiveIE,
IchinanaLiveClipIE,
+ IchinanaLiveIE,
)
from .idolplus import IdolPlusIE
from .ign import (
IGNIE,
- IGNVideoIE,
IGNArticleIE,
+ IGNVideoIE,
)
from .iheart import (
IHeartRadioIE,
@@ -801,12 +848,12 @@ from .ilpost import IlPostIE
from .iltalehti import IltalehtiIE
from .imdb import (
ImdbIE,
- ImdbListIE
+ ImdbListIE,
)
from .imgur import (
- ImgurIE,
ImgurAlbumIE,
ImgurGalleryIE,
+ ImgurIE,
)
from .ina import InaIE
from .inc import IncIE
@@ -815,20 +862,20 @@ from .infoq import InfoQIE
from .instagram import (
InstagramIE,
InstagramIOSIE,
- InstagramUserIE,
- InstagramTagIE,
InstagramStoryIE,
+ InstagramTagIE,
+ InstagramUserIE,
)
from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import (
+ IPrimaCNNIE,
IPrimaIE,
- IPrimaCNNIE
)
from .iqiyi import (
- IqiyiIE,
+ IqAlbumIE,
IqIE,
- IqAlbumIE
+ IqiyiIE,
)
from .islamchannel import (
IslamChannelIE,
@@ -836,16 +883,16 @@ from .islamchannel import (
)
from .israelnationalnews import IsraelNationalNewsIE
from .itprotv import (
+ ITProTVCourseIE,
ITProTVIE,
- ITProTVCourseIE
)
from .itv import (
- ITVIE,
ITVBTCCIE,
+ ITVIE,
)
from .ivi import (
+ IviCompilationIE,
IviIE,
- IviCompilationIE
)
from .ivideon import IvideonIE
from .iwara import (
@@ -855,29 +902,30 @@ from .iwara import (
)
from .ixigua import IxiguaIE
from .izlesene import IzleseneIE
-from .jable import (
- JableIE,
- JablePlaylistIE,
-)
from .jamendo import (
- JamendoIE,
JamendoAlbumIE,
+ JamendoIE,
)
from .japandiet import (
+ SangiinIE,
+ SangiinInstructionIE,
ShugiinItvLiveIE,
ShugiinItvLiveRoomIE,
ShugiinItvVodIE,
- SangiinInstructionIE,
- SangiinIE,
)
from .jeuxvideo import JeuxVideoIE
+from .jiocinema import (
+ JioCinemaIE,
+ JioCinemaSeriesIE,
+)
from .jiosaavn import (
- JioSaavnSongIE,
JioSaavnAlbumIE,
+ JioSaavnPlaylistIE,
+ JioSaavnSongIE,
)
-from .jove import JoveIE
from .joj import JojIE
from .joqrag import JoqrAgIE
+from .jove import JoveIE
from .jstream import JStreamIE
from .jtbc import (
JTBCIE,
@@ -904,17 +952,17 @@ from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE
from .kompas import KompasVideoIE
from .koo import KooIE
-from .kth import KTHIE
from .krasview import KrasViewIE
+from .kth import KTHIE
from .ku6 import Ku6IE
from .kukululive import KukuluLiveIE
from .kuwo import (
- KuwoIE,
KuwoAlbumIE,
- KuwoChartIE,
- KuwoSingerIE,
KuwoCategoryIE,
+ KuwoChartIE,
+ KuwoIE,
KuwoMvIE,
+ KuwoSingerIE,
)
from .la7 import (
LA7IE,
@@ -934,14 +982,14 @@ from .lbry import (
)
from .lci import LCIIE
from .lcp import (
- LcpPlayIE,
LcpIE,
+ LcpPlayIE,
)
from .lecture2go import Lecture2GoIE
from .lecturio import (
- LecturioIE,
LecturioCourseIE,
LecturioDeCourseIE,
+ LecturioIE,
)
from .leeco import (
LeIE,
@@ -958,22 +1006,22 @@ from .lenta import LentaIE
from .libraryofcongress import LibraryOfCongressIE
from .libsyn import LibsynIE
from .lifenews import (
- LifeNewsIE,
LifeEmbedIE,
+ LifeNewsIE,
)
from .likee import (
LikeeIE,
- LikeeUserIE
+ LikeeUserIE,
)
from .limelight import (
- LimelightMediaIE,
LimelightChannelIE,
LimelightChannelListIE,
+ LimelightMediaIE,
)
from .linkedin import (
LinkedInIE,
- LinkedInLearningIE,
LinkedInLearningCourseIE,
+ LinkedInLearningIE,
)
from .liputan6 import Liputan6IE
from .listennotes import ListenNotesIE
@@ -989,22 +1037,24 @@ from .lnkgo import (
LnkGoIE,
LnkIE,
)
+from .loom import (
+ LoomFolderIE,
+ LoomIE,
+)
from .lovehomeporn import LoveHomePornIE
from .lrt import (
LRTVODIE,
- LRTStreamIE
+ LRTStreamIE,
)
from .lsm import (
LSMLREmbedIE,
LSMLTVEmbedIE,
- LSMReplayIE
-)
-from .lumni import (
- LumniIE
+ LSMReplayIE,
)
+from .lumni import LumniIE
from .lynda import (
+ LyndaCourseIE,
LyndaIE,
- LyndaCourseIE
)
from .maariv import MaarivIE
from .magellantv import MagellanTVIE
@@ -1016,13 +1066,13 @@ from .mailru import (
)
from .mainstreaming import MainStreamingIE
from .mangomolo import (
- MangomoloVideoIE,
MangomoloLiveIE,
+ MangomoloVideoIE,
)
from .manoto import (
ManotoTVIE,
- ManotoTVShowIE,
ManotoTVLiveIE,
+ ManotoTVShowIE,
)
from .manyvids import ManyVidsIE
from .maoritv import MaoriTVIE
@@ -1038,13 +1088,14 @@ from .mdr import MDRIE
from .medaltv import MedalTVIE
from .mediaite import MediaiteIE
from .mediaklikk import MediaKlikkIE
+from .medialaan import MedialaanIE
from .mediaset import (
MediasetIE,
MediasetShowIE,
)
from .mediasite import (
- MediasiteIE,
MediasiteCatalogIE,
+ MediasiteIE,
MediasiteNamedCatalogIE,
)
from .mediastream import (
@@ -1054,26 +1105,30 @@ from .mediastream import (
from .mediaworksnz import MediaWorksNZVODIE
from .medici import MediciIE
from .megaphone import MegaphoneIE
+from .megatvcom import (
+ MegaTVComEmbedIE,
+ MegaTVComIE,
+)
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .metacritic import MetacriticIE
from .mgtv import MGTVIE
+from .microsoftembed import MicrosoftEmbedIE
from .microsoftstream import MicrosoftStreamIE
from .microsoftvirtualacademy import (
- MicrosoftVirtualAcademyIE,
MicrosoftVirtualAcademyCourseIE,
+ MicrosoftVirtualAcademyIE,
)
-from .microsoftembed import MicrosoftEmbedIE
from .mildom import (
- MildomIE,
- MildomVodIE,
MildomClipIE,
+ MildomIE,
MildomUserVodIE,
+ MildomVodIE,
)
from .minds import (
- MindsIE,
MindsChannelIE,
MindsGroupIE,
+ MindsIE,
)
from .minoto import MinotoIE
from .mirrativ import (
@@ -1081,31 +1136,34 @@ from .mirrativ import (
MirrativUserIE,
)
from .mirrorcouk import MirrorCoUKIE
-from .mit import TechTVMITIE, OCWMITIE
+from .mit import (
+ OCWMITIE,
+ TechTVMITIE,
+)
from .mitele import MiTeleIE
from .mixch import (
- MixchIE,
MixchArchiveIE,
+ MixchIE,
)
from .mixcloud import (
MixcloudIE,
- MixcloudUserIE,
MixcloudPlaylistIE,
+ MixcloudUserIE,
)
from .mlb import (
MLBIE,
- MLBVideoIE,
MLBTVIE,
MLBArticleIE,
+ MLBVideoIE,
)
from .mlssoccer import MLSSoccerIE
from .mocha import MochaVideoIE
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .motherless import (
- MotherlessIE,
- MotherlessGroupIE,
MotherlessGalleryIE,
+ MotherlessGroupIE,
+ MotherlessIE,
MotherlessUploaderIE,
)
from .motorsport import MotorsportIE
@@ -1115,23 +1173,26 @@ from .moviezine import MoviezineIE
from .movingimage import MovingImageIE
from .msn import MSNIE
from .mtv import (
- MTVIE,
- MTVVideoIE,
- MTVServicesEmbeddedIE,
MTVDEIE,
- MTVJapanIE,
+ MTVIE,
MTVItaliaIE,
MTVItaliaProgrammaIE,
+ MTVJapanIE,
+ MTVServicesEmbeddedIE,
+ MTVVideoIE,
)
from .muenchentv import MuenchenTVIE
-from .murrtube import MurrtubeIE, MurrtubeUserIE
+from .murrtube import (
+ MurrtubeIE,
+ MurrtubeUserIE,
+)
from .museai import MuseAIIE
from .musescore import MuseScoreIE
from .musicdex import (
- MusicdexSongIE,
MusicdexAlbumIE,
MusicdexArtistIE,
MusicdexPlaylistIE,
+ MusicdexSongIE,
)
from .mx3 import (
Mx3IE,
@@ -1142,7 +1203,10 @@ from .mxplayer import (
MxplayerIE,
MxplayerShowIE,
)
-from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspace import (
+ MySpaceAlbumIE,
+ MySpaceIE,
+)
from .myspass import MySpassIE
from .myvideoge import MyVideoGeIE
from .myvidster import MyVidsterIE
@@ -1156,8 +1220,8 @@ from .nate import (
NateProgramIE,
)
from .nationalgeographic import (
- NationalGeographicVideoIE,
NationalGeographicTVIE,
+ NationalGeographicVideoIE,
)
from .naver import (
NaverIE,
@@ -1165,12 +1229,12 @@ from .naver import (
NaverNowIE,
)
from .nba import (
- NBAWatchEmbedIE,
- NBAWatchIE,
- NBAWatchCollectionIE,
- NBAEmbedIE,
NBAIE,
NBAChannelIE,
+ NBAEmbedIE,
+ NBAWatchCollectionIE,
+ NBAWatchEmbedIE,
+ NBAWatchIE,
)
from .nbc import (
NBCIE,
@@ -1184,35 +1248,35 @@ from .nbc import (
)
from .ndr import (
NDRIE,
- NJoyIE,
NDREmbedBaseIE,
NDREmbedIE,
NJoyEmbedIE,
+ NJoyIE,
)
from .ndtv import NDTVIE
from .nebula import (
- NebulaIE,
+ NebulaChannelIE,
NebulaClassIE,
+ NebulaIE,
NebulaSubscriptionsIE,
- NebulaChannelIE,
)
from .nekohacker import NekoHackerIE
from .nerdcubed import NerdCubedFeedIE
-from .netzkino import NetzkinoIE
from .neteasemusic import (
- NetEaseMusicIE,
NetEaseMusicAlbumIE,
- NetEaseMusicSingerIE,
+ NetEaseMusicDjRadioIE,
+ NetEaseMusicIE,
NetEaseMusicListIE,
NetEaseMusicMvIE,
NetEaseMusicProgramIE,
- NetEaseMusicDjRadioIE,
+ NetEaseMusicSingerIE,
)
from .netverse import (
NetverseIE,
NetversePlaylistIE,
NetverseSearchIE,
)
+from .netzkino import NetzkinoIE
from .newgrounds import (
NewgroundsIE,
NewgroundsPlaylistIE,
@@ -1221,14 +1285,14 @@ from .newgrounds import (
from .newspicks import NewsPicksIE
from .newsy import NewsyIE
from .nextmedia import (
- NextMediaIE,
- NextMediaActionNewsIE,
AppleDailyIE,
+ NextMediaActionNewsIE,
+ NextMediaIE,
NextTVIE,
)
from .nexx import (
- NexxIE,
NexxEmbedIE,
+ NexxIE,
)
from .nfb import (
NFBIE,
@@ -1242,43 +1306,43 @@ from .nfl import (
NFLPlusReplayIE,
)
from .nhk import (
- NhkVodIE,
- NhkVodProgramIE,
NhkForSchoolBangumiIE,
- NhkForSchoolSubjectIE,
NhkForSchoolProgramListIE,
+ NhkForSchoolSubjectIE,
NhkRadioNewsPageIE,
NhkRadiruIE,
NhkRadiruLiveIE,
+ NhkVodIE,
+ NhkVodProgramIE,
)
from .nhl import NHLIE
from .nick import (
- NickIE,
NickBrIE,
NickDeIE,
+ NickIE,
NickRuIE,
)
from .niconico import (
+ NiconicoHistoryIE,
NiconicoIE,
+ NiconicoLiveIE,
NiconicoPlaylistIE,
- NiconicoUserIE,
NiconicoSeriesIE,
- NiconicoHistoryIE,
+ NiconicoUserIE,
NicovideoSearchDateIE,
NicovideoSearchIE,
NicovideoSearchURLIE,
NicovideoTagURLIE,
- NiconicoLiveIE,
+)
+from .niconicochannelplus import (
+ NiconicoChannelPlusChannelLivesIE,
+ NiconicoChannelPlusChannelVideosIE,
+ NiconicoChannelPlusIE,
)
from .ninaprotocol import NinaProtocolIE
from .ninecninemedia import (
- NineCNineMediaIE,
CPTwentyFourIE,
-)
-from .niconicochannelplus import (
- NiconicoChannelPlusIE,
- NiconicoChannelPlusChannelVideosIE,
- NiconicoChannelPlusChannelLivesIE,
+ NineCNineMediaIE,
)
from .ninegag import NineGagIE
from .ninenews import NineNewsIE
@@ -1303,46 +1367,47 @@ from .nowness import (
)
from .noz import NozIE
from .npo import (
- AndereTijdenIE,
NPOIE,
+ VPROIE,
+ WNLIE,
+ AndereTijdenIE,
+ HetKlokhuisIE,
NPOLiveIE,
- NPORadioIE,
NPORadioFragmentIE,
+ NPORadioIE,
SchoolTVIE,
- HetKlokhuisIE,
- VPROIE,
- WNLIE,
)
from .npr import NprIE
from .nrk import (
NRKIE,
+ NRKTVIE,
NRKPlaylistIE,
+ NRKRadioPodkastIE,
NRKSkoleIE,
- NRKTVIE,
NRKTVDirekteIE,
- NRKRadioPodkastIE,
NRKTVEpisodeIE,
NRKTVEpisodesIE,
NRKTVSeasonIE,
NRKTVSeriesIE,
)
from .nrl import NRLTVIE
+from .nts import NTSLiveIE
from .ntvcojp import NTVCoJpCUIE
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
from .nubilesporn import NubilesPornIE
-from .nytimes import (
- NYTimesIE,
- NYTimesArticleIE,
- NYTimesCookingIE,
- NYTimesCookingRecipeIE,
-)
from .nuum import (
NuumLiveIE,
- NuumTabIE,
NuumMediaIE,
+ NuumTabIE,
)
from .nuvid import NuvidIE
+from .nytimes import (
+ NYTimesArticleIE,
+ NYTimesCookingIE,
+ NYTimesCookingRecipeIE,
+ NYTimesIE,
+)
from .nzherald import NZHeraldIE
from .nzonscreen import NZOnScreenIE
from .nzz import NZZIE
@@ -1350,7 +1415,7 @@ from .odkmedia import OnDemandChinaEpisodeIE
from .odnoklassniki import OdnoklassnikiIE
from .oftv import (
OfTVIE,
- OfTVPlaylistIE
+ OfTVPlaylistIE,
)
from .oktoberfesttv import OktoberfestTVIE
from .olympics import OlympicsReplayIE
@@ -1363,8 +1428,8 @@ from .onefootball import OneFootballIE
from .onenewsnz import OneNewsNZIE
from .oneplace import OnePlacePodcastIE
from .onet import (
- OnetIE,
OnetChannelIE,
+ OnetIE,
OnetMVPIE,
OnetPlIE,
)
@@ -1374,34 +1439,33 @@ from .opencast import (
OpencastPlaylistIE,
)
from .openrec import (
- OpenRecIE,
OpenRecCaptureIE,
+ OpenRecIE,
OpenRecMovieIE,
)
from .ora import OraTVIE
from .orf import (
- ORFTVthekIE,
- ORFFM4StoryIE,
+ ORFIPTVIE,
ORFONIE,
- ORFRadioIE,
+ ORFFM4StoryIE,
ORFPodcastIE,
- ORFIPTVIE,
+ ORFRadioIE,
)
from .outsidetv import OutsideTVIE
from .owncloud import OwnCloudIE
from .packtpub import (
- PacktPubIE,
PacktPubCourseIE,
+ PacktPubIE,
)
from .palcomp3 import (
- PalcoMP3IE,
PalcoMP3ArtistIE,
+ PalcoMP3IE,
PalcoMP3VideoIE,
)
from .panopto import (
PanoptoIE,
PanoptoListIE,
- PanoptoPlaylistIE
+ PanoptoPlaylistIE,
)
from .paramountplus import (
ParamountPlusIE,
@@ -1410,12 +1474,18 @@ from .paramountplus import (
from .parler import ParlerIE
from .parlview import ParlviewIE
from .patreon import (
+ PatreonCampaignIE,
PatreonIE,
- PatreonCampaignIE
)
-from .pbs import PBSIE, PBSKidsIE
+from .pbs import (
+ PBSIE,
+ PBSKidsIE,
+)
from .pearvideo import PearVideoIE
-from .peekvids import PeekVidsIE, PlayVidsIE
+from .peekvids import (
+ PeekVidsIE,
+ PlayVidsIE,
+)
from .peertube import (
PeerTubeIE,
PeerTubePlaylistIE,
@@ -1423,7 +1493,7 @@ from .peertube import (
from .peertv import PeerTVIE
from .peloton import (
PelotonIE,
- PelotonLiveIE
+ PelotonLiveIE,
)
from .performgroup import PerformGroupIE
from .periscope import (
@@ -1443,8 +1513,8 @@ from .picarto import (
from .piksel import PikselIE
from .pinkbike import PinkbikeIE
from .pinterest import (
- PinterestIE,
PinterestCollectionIE,
+ PinterestIE,
)
from .pixivsketch import (
PixivSketchIE,
@@ -1453,19 +1523,22 @@ from .pixivsketch import (
from .pladform import PladformIE
from .planetmarathi import PlanetMarathiIE
from .platzi import (
- PlatziIE,
PlatziCourseIE,
+ PlatziIE,
)
from .playplustv import PlayPlusTVIE
from .playsuisse import PlaySuisseIE
from .playtvak import PlaytvakIE
from .playwire import PlaywireIE
-from .plutotv import PlutoTVIE
from .pluralsight import (
- PluralsightIE,
PluralsightCourseIE,
+ PluralsightIE,
+)
+from .plutotv import PlutoTVIE
+from .podbayfm import (
+ PodbayFMChannelIE,
+ PodbayFMIE,
)
-from .podbayfm import PodbayFMIE, PodbayFMChannelIE
from .podchaser import PodchaserIE
from .podomatic import PodomaticIE
from .pokemon import (
@@ -1473,77 +1546,81 @@ from .pokemon import (
PokemonWatchIE,
)
from .pokergo import (
- PokerGoIE,
PokerGoCollectionIE,
+ PokerGoIE,
)
from .polsatgo import PolsatGoIE
from .polskieradio import (
- PolskieRadioIE,
- PolskieRadioLegacyIE,
PolskieRadioAuditionIE,
PolskieRadioCategoryIE,
+ PolskieRadioIE,
+ PolskieRadioLegacyIE,
PolskieRadioPlayerIE,
PolskieRadioPodcastIE,
PolskieRadioPodcastListIE,
)
from .popcorntimes import PopcorntimesIE
from .popcorntv import PopcornTVIE
-from .porn91 import Porn91IE
from .pornbox import PornboxIE
from .pornflip import PornFlipIE
from .pornhub import (
PornHubIE,
- PornHubUserIE,
- PornHubPlaylistIE,
PornHubPagedVideoListIE,
+ PornHubPlaylistIE,
+ PornHubUserIE,
PornHubUserVideosUploadIE,
)
from .pornotube import PornotubeIE
from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
-from .puhutv import (
- PuhuTVIE,
- PuhuTVSerieIE,
-)
from .pr0gramm import Pr0grammIE
-from .prankcast import PrankCastIE, PrankCastPostIE
+from .prankcast import (
+ PrankCastIE,
+ PrankCastPostIE,
+)
from .premiershiprugby import PremiershipRugbyIE
from .presstv import PressTVIE
from .projectveritas import ProjectVeritasIE
from .prosiebensat1 import ProSiebenSat1IE
from .prx import (
- PRXStoryIE,
- PRXSeriesIE,
PRXAccountIE,
+ PRXSeriesIE,
+ PRXSeriesSearchIE,
PRXStoriesSearchIE,
- PRXSeriesSearchIE
+ PRXStoryIE,
+)
+from .puhutv import (
+ PuhuTVIE,
+ PuhuTVSerieIE,
)
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
from .qdance import QDanceIE
from .qingting import QingTingIE
from .qqmusic import (
+ QQMusicAlbumIE,
QQMusicIE,
+ QQMusicPlaylistIE,
QQMusicSingerIE,
- QQMusicAlbumIE,
QQMusicToplistIE,
- QQMusicPlaylistIE,
)
from .r7 import (
R7IE,
R7ArticleIE,
)
-from .radiko import RadikoIE, RadikoRadioIE
+from .radiko import (
+ RadikoIE,
+ RadikoRadioIE,
+)
from .radiocanada import (
- RadioCanadaIE,
RadioCanadaAudioVideoIE,
+ RadioCanadaIE,
)
from .radiocomercial import (
RadioComercialIE,
RadioComercialPlaylistIE,
)
from .radiode import RadioDeIE
-from .radiojavan import RadioJavanIE
from .radiofrance import (
FranceCultureIE,
RadioFranceIE,
@@ -1552,35 +1629,36 @@ from .radiofrance import (
RadioFranceProfileIE,
RadioFranceProgramScheduleIE,
)
-from .radiozet import RadioZetPodcastIE
+from .radiojavan import RadioJavanIE
from .radiokapital import (
RadioKapitalIE,
RadioKapitalShowIE,
)
+from .radiozet import RadioZetPodcastIE
from .radlive import (
- RadLiveIE,
RadLiveChannelIE,
+ RadLiveIE,
RadLiveSeasonIE,
)
from .rai import (
- RaiIE,
RaiCulturaIE,
+ RaiIE,
+ RaiNewsIE,
RaiPlayIE,
RaiPlayLiveIE,
RaiPlayPlaylistIE,
RaiPlaySoundIE,
RaiPlaySoundLiveIE,
RaiPlaySoundPlaylistIE,
- RaiNewsIE,
RaiSudtirolIE,
)
from .raywenderlich import (
- RayWenderlichIE,
RayWenderlichCourseIE,
+ RayWenderlichIE,
)
from .rbgtum import (
- RbgTumIE,
RbgTumCourseIE,
+ RbgTumIE,
RbgTumNewCourseIE,
)
from .rcs import (
@@ -1594,12 +1672,15 @@ from .rcti import (
RCTIPlusTVIE,
)
from .rds import RDSIE
-from .redbee import ParliamentLiveUKIE, RTBFIE
+from .redbee import (
+ RTBFIE,
+ ParliamentLiveUKIE,
+)
from .redbulltv import (
- RedBullTVIE,
RedBullEmbedIE,
- RedBullTVRrnContentIE,
RedBullIE,
+ RedBullTVIE,
+ RedBullTVRrnContentIE,
)
from .reddit import RedditIE
from .redge import RedCDNLivxIE
@@ -1619,107 +1700,100 @@ from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .ridehome import RideHomeIE
from .rinsefm import (
- RinseFMIE,
RinseFMArtistPlaylistIE,
+ RinseFMIE,
)
from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE
from .rokfin import (
- RokfinIE,
- RokfinStackIE,
RokfinChannelIE,
+ RokfinIE,
RokfinSearchIE,
+ RokfinStackIE,
+)
+from .roosterteeth import (
+ RoosterTeethIE,
+ RoosterTeethSeriesIE,
)
-from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
from .rottentomatoes import RottenTomatoesIE
from .rozhlas import (
+ MujRozhlasIE,
RozhlasIE,
RozhlasVltavaIE,
- MujRozhlasIE,
)
-from .rte import RteIE, RteRadioIE
+from .rte import (
+ RteIE,
+ RteRadioIE,
+)
+from .rtl2 import RTL2IE
from .rtlnl import (
- RtlNlIE,
- RTLLuTeleVODIE,
RTLLuArticleIE,
RTLLuLiveIE,
RTLLuRadioIE,
+ RTLLuTeleVODIE,
+ RtlNlIE,
)
-from .rtl2 import RTL2IE
from .rtnews import (
- RTNewsIE,
RTDocumentryIE,
RTDocumentryPlaylistIE,
+ RTNewsIE,
RuptlyIE,
)
from .rtp import RTPIE
from .rtrfm import RTRFMIE
from .rts import RTSIE
from .rtvcplay import (
- RTVCPlayIE,
- RTVCPlayEmbedIE,
RTVCKalturaIE,
+ RTVCPlayEmbedIE,
+ RTVCPlayIE,
)
from .rtve import (
RTVEALaCartaIE,
RTVEAudioIE,
- RTVELiveIE,
RTVEInfantilIE,
+ RTVELiveIE,
RTVETelevisionIE,
)
from .rtvs import RTVSIE
from .rtvslo import RTVSLOIE
+from .rudovideo import RudoVideoIE
from .rule34video import Rule34VideoIE
from .rumble import (
+ RumbleChannelIE,
RumbleEmbedIE,
RumbleIE,
- RumbleChannelIE,
)
-from .rudovideo import RudoVideoIE
from .rutube import (
- RutubeIE,
RutubeChannelIE,
RutubeEmbedIE,
+ RutubeIE,
RutubeMovieIE,
RutubePersonIE,
RutubePlaylistIE,
RutubeTagsIE,
)
-from .glomex import (
- GlomexIE,
- GlomexEmbedIE,
-)
-from .megatvcom import (
- MegaTVComIE,
- MegaTVComEmbedIE,
-)
-from .antenna import (
- AntennaGrWatchIE,
- Ant1NewsGrArticleIE,
- Ant1NewsGrEmbedIE,
-)
from .rutv import RUTVIE
from .ruutu import RuutuIE
from .ruv import (
RuvIE,
- RuvSpilaIE
+ RuvSpilaIE,
)
from .s4c import (
S4CIE,
- S4CSeriesIE
+ S4CSeriesIE,
)
from .safari import (
- SafariIE,
SafariApiIE,
SafariCourseIE,
+ SafariIE,
)
from .saitosan import SaitosanIE
from .samplefocus import SampleFocusIE
from .sapo import SapoIE
from .sbs import SBSIE
from .sbscokr import (
- SBSCoKrIE,
SBSCoKrAllvodProgramIE,
+ SBSCoKrIE,
SBSCoKrProgramsVodIE,
)
from .screen9 import Screen9IE
@@ -1727,62 +1801,66 @@ from .screencast import ScreencastIE
from .screencastify import ScreencastifyIE
from .screencastomatic import ScreencastOMaticIE
from .scrippsnetworks import (
- ScrippsNetworksWatchIE,
ScrippsNetworksIE,
+ ScrippsNetworksWatchIE,
)
+from .scrolller import ScrolllerIE
from .scte import (
SCTEIE,
SCTECourseIE,
)
-from .scrolller import ScrolllerIE
from .sejmpl import SejmIE
from .senalcolombia import SenalColombiaLiveIE
-from .senategov import SenateISVPIE, SenateGovIE
+from .senategov import (
+ SenateGovIE,
+ SenateISVPIE,
+)
from .sendtonews import SendtoNewsIE
from .servus import ServusIE
from .sevenplus import SevenPlusIE
from .sexu import SexuIE
from .seznamzpravy import (
- SeznamZpravyIE,
SeznamZpravyArticleIE,
+ SeznamZpravyIE,
)
from .shahid import (
ShahidIE,
ShahidShowIE,
)
+from .sharepoint import SharePointIE
from .sharevideos import ShareVideosEmbedIE
-from .sibnet import SibnetEmbedIE
from .shemaroome import ShemarooMeIE
from .showroomlive import ShowRoomLiveIE
+from .sibnet import SibnetEmbedIE
from .simplecast import (
- SimplecastIE,
SimplecastEpisodeIE,
+ SimplecastIE,
SimplecastPodcastIE,
)
from .sina import SinaIE
from .sixplay import SixPlayIE
from .skeb import SkebIE
+from .sky import (
+ SkyNewsIE,
+ SkyNewsStoryIE,
+ SkySportsIE,
+ SkySportsNewsIE,
+)
from .skyit import (
+ CieloTVItIE,
+ SkyItArteIE,
+ SkyItIE,
SkyItPlayerIE,
SkyItVideoIE,
SkyItVideoLiveIE,
- SkyItIE,
- SkyItArteIE,
- CieloTVItIE,
TV8ItIE,
)
from .skylinewebcams import SkylineWebcamsIE
from .skynewsarabia import (
- SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
+ SkyNewsArabiaIE,
)
from .skynewsau import SkyNewsAUIE
-from .sky import (
- SkyNewsIE,
- SkyNewsStoryIE,
- SkySportsIE,
- SkySportsNewsIE,
-)
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
@@ -1799,29 +1877,29 @@ from .sonyliv import (
from .soundcloud import (
SoundcloudEmbedIE,
SoundcloudIE,
- SoundcloudSetIE,
+ SoundcloudPlaylistIE,
SoundcloudRelatedIE,
+ SoundcloudSearchIE,
+ SoundcloudSetIE,
+ SoundcloudTrackStationIE,
SoundcloudUserIE,
SoundcloudUserPermalinkIE,
- SoundcloudTrackStationIE,
- SoundcloudPlaylistIE,
- SoundcloudSearchIE,
)
from .soundgasm import (
SoundgasmIE,
- SoundgasmProfileIE
+ SoundgasmProfileIE,
)
from .southpark import (
- SouthParkIE,
SouthParkDeIE,
SouthParkDkIE,
SouthParkEsIE,
+ SouthParkIE,
SouthParkLatIE,
- SouthParkNlIE
+ SouthParkNlIE,
)
from .sovietscloset import (
SovietsClosetIE,
- SovietsClosetPlaylistIE
+ SovietsClosetPlaylistIE,
)
from .spankbang import (
SpankBangIE,
@@ -1832,12 +1910,6 @@ from .spike import (
BellatorIE,
ParamountNetworkIE,
)
-from .stageplus import StagePlusVODConcertIE
-from .startrek import StarTrekIE
-from .stitcher import (
- StitcherIE,
- StitcherShowIE,
-)
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
@@ -1861,19 +1933,25 @@ from .srmediathek import SRMediathekIE
from .stacommu import (
StacommuLiveIE,
StacommuVODIE,
- TheaterComplexTownVODIE,
TheaterComplexTownPPVIE,
+ TheaterComplexTownVODIE,
)
+from .stageplus import StagePlusVODConcertIE
from .stanfordoc import StanfordOpenClassroomIE
+from .startrek import StarTrekIE
from .startv import StarTVIE
from .steam import (
- SteamIE,
SteamCommunityBroadcastIE,
+ SteamIE,
+)
+from .stitcher import (
+ StitcherIE,
+ StitcherShowIE,
)
from .storyfire import (
StoryFireIE,
- StoryFireUserIE,
StoryFireSeriesIE,
+ StoryFireUserIE,
)
from .streamable import StreamableIE
from .streamcz import StreamCZIE
@@ -1894,20 +1972,26 @@ from .svt import (
SVTSeriesIE,
)
from .swearnet import SwearnetEpisodeIE
-from .syvdk import SYVDKIE
from .syfy import SyfyIE
+from .syvdk import SYVDKIE
from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
+from .taptap import (
+ TapTapAppIE,
+ TapTapAppIntlIE,
+ TapTapMomentIE,
+ TapTapPostIntlIE,
+)
from .tass import TassIE
from .tbs import TBSIE
from .tbsjp import (
TBSJPEpisodeIE,
- TBSJPProgramIE,
TBSJPPlaylistIE,
+ TBSJPProgramIE,
)
from .teachable import (
- TeachableIE,
TeachableCourseIE,
+ TeachableIE,
)
from .teachertube import (
TeacherTubeIE,
@@ -1915,8 +1999,8 @@ from .teachertube import (
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import (
- TeamcocoIE,
ConanClassicIE,
+ TeamcocoIE,
)
from .teamtreehouse import TeamTreeHouseIE
from .ted import (
@@ -1935,15 +2019,18 @@ from .telegram import TelegramEmbedIE
from .telemb import TeleMBIE
from .telemundo import TelemundoIE
from .telequebec import (
- TeleQuebecIE,
- TeleQuebecSquatIE,
TeleQuebecEmissionIE,
+ TeleQuebecIE,
TeleQuebecLiveIE,
+ TeleQuebecSquatIE,
TeleQuebecVideoIE,
)
from .teletask import TeleTaskIE
from .telewebion import TelewebionIE
-from .tempo import TempoIE, IVXPlayerIE
+from .tempo import (
+ IVXPlayerIE,
+ TempoIE,
+)
from .tencent import (
IflixEpisodeIE,
IflixSeriesIE,
@@ -1967,8 +2054,8 @@ from .theguardian import (
from .theholetv import TheHoleTvIE
from .theintercept import TheInterceptIE
from .theplatform import (
- ThePlatformIE,
ThePlatformFeedIE,
+ ThePlatformIE,
)
from .thestar import TheStarIE
from .thesun import TheSunIE
@@ -1980,50 +2067,52 @@ from .thisvid import (
ThisVidMemberIE,
ThisVidPlaylistIE,
)
+from .threeqsdn import ThreeQSDNIE
from .threespeak import (
ThreeSpeakIE,
ThreeSpeakUserIE,
)
-from .threeqsdn import ThreeQSDNIE
from .tiktok import (
+ DouyinIE,
+ TikTokCollectionIE,
+ TikTokEffectIE,
TikTokIE,
- TikTokUserIE,
+ TikTokLiveIE,
TikTokSoundIE,
- TikTokEffectIE,
TikTokTagIE,
+ TikTokUserIE,
TikTokVMIE,
- TikTokLiveIE,
- DouyinIE,
)
from .tmz import TMZIE
from .tnaflix import (
- TNAFlixNetworkEmbedIE,
- TNAFlixIE,
EMPFlixIE,
MovieFapIE,
+ TNAFlixIE,
+ TNAFlixNetworkEmbedIE,
)
from .toggle import (
- ToggleIE,
MeWatchIE,
+ ToggleIE,
)
-from .toggo import (
- ToggoIE,
-)
+from .toggo import ToggoIE
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
from .toutv import TouTvIE
-from .toypics import ToypicsUserIE, ToypicsIE
+from .toypics import (
+ ToypicsIE,
+ ToypicsUserIE,
+)
from .traileraddict import TrailerAddictIE
from .triller import (
TrillerIE,
- TrillerUserIE,
TrillerShortIE,
+ TrillerUserIE,
)
from .trovo import (
+ TrovoChannelClipIE,
+ TrovoChannelVodIE,
TrovoIE,
TrovoVodIE,
- TrovoChannelVodIE,
- TrovoChannelClipIE,
)
from .trtcocuk import TrtCocukVideoIE
from .trtworld import TrtWorldIE
@@ -2032,26 +2121,26 @@ from .trunews import TruNewsIE
from .truth import TruthIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
-from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE
+from .tubetugraz import (
+ TubeTuGrazIE,
+ TubeTuGrazSeriesIE,
+)
from .tubitv import (
TubiTvIE,
TubiTvShowIE,
)
from .tumblr import TumblrIE
from .tunein import (
- TuneInStationIE,
- TuneInPodcastIE,
TuneInPodcastEpisodeIE,
+ TuneInPodcastIE,
TuneInShortenerIE,
+ TuneInStationIE,
)
from .tv2 import (
TV2IE,
- TV2ArticleIE,
KatsomoIE,
MTVUutisetArticleIE,
-)
-from .tv24ua import (
- TV24UAVideoIE,
+ TV2ArticleIE,
)
from .tv2dk import (
TV2DKIE,
@@ -2064,16 +2153,17 @@ from .tv2hu import (
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tv5unis import (
- TV5UnisVideoIE,
TV5UnisIE,
+ TV5UnisVideoIE,
)
+from .tv24ua import TV24UAVideoIE
from .tva import (
TVAIE,
QubIE,
)
from .tvanouvelles import (
- TVANouvellesIE,
TVANouvellesArticleIE,
+ TVANouvellesIE,
)
from .tvc import (
TVCIE,
@@ -2086,19 +2176,19 @@ from .tvland import TVLandIE
from .tvn24 import TVN24IE
from .tvnoe import TVNoeIE
from .tvopengr import (
- TVOpenGrWatchIE,
TVOpenGrEmbedIE,
+ TVOpenGrWatchIE,
)
from .tvp import (
- TVPEmbedIE,
TVPIE,
+ TVPEmbedIE,
TVPStreamIE,
TVPVODSeriesIE,
TVPVODVideoIE,
)
from .tvplay import (
- TVPlayIE,
TVPlayHomeIE,
+ TVPlayIE,
)
from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
@@ -2110,29 +2200,29 @@ from .twitcasting import (
TwitCastingUserIE,
)
from .twitch import (
- TwitchVodIE,
+ TwitchClipsIE,
TwitchCollectionIE,
- TwitchVideosIE,
+ TwitchStreamIE,
TwitchVideosClipsIE,
TwitchVideosCollectionsIE,
- TwitchStreamIE,
- TwitchClipsIE,
+ TwitchVideosIE,
+ TwitchVodIE,
)
from .twitter import (
- TwitterCardIE,
- TwitterIE,
TwitterAmplifyIE,
TwitterBroadcastIE,
- TwitterSpacesIE,
+ TwitterCardIE,
+ TwitterIE,
TwitterShortenerIE,
+ TwitterSpacesIE,
)
from .txxx import (
- TxxxIE,
PornTopIE,
+ TxxxIE,
)
from .udemy import (
+ UdemyCourseIE,
UdemyIE,
- UdemyCourseIE
)
from .udn import UDNEmbedIE
from .ufctv import (
@@ -2141,16 +2231,13 @@ from .ufctv import (
)
from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE
-from .digiteka import DigitekaIE
-from .dlive import (
- DLiveVODIE,
- DLiveStreamIE,
-)
-from .drooble import DroobleIE
from .umg import UMGDeIE
from .unistra import UnistraIE
from .unity import UnityIE
-from .unsupported import KnownDRMIE, KnownPiracyIE
+from .unsupported import (
+ KnownDRMIE,
+ KnownPiracyIE,
+)
from .uol import UOLIE
from .uplynk import (
UplynkIE,
@@ -2160,10 +2247,13 @@ from .urort import UrortIE
from .urplay import URPlayIE
from .usanetwork import USANetworkIE
from .usatoday import USATodayIE
-from .ustream import UstreamIE, UstreamChannelIE
+from .ustream import (
+ UstreamChannelIE,
+ UstreamIE,
+)
from .ustudio import (
- UstudioIE,
UstudioEmbedIE,
+ UstudioIE,
)
from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE
@@ -2171,7 +2261,7 @@ from .vbox7 import Vbox7IE
from .veo import VeoIE
from .veoh import (
VeohIE,
- VeohUserIE
+ VeohUserIE,
)
from .vesti import VestiIE
from .vevo import (
@@ -2179,14 +2269,14 @@ from .vevo import (
VevoPlaylistIE,
)
from .vgtv import (
+ VGTVIE,
BTArticleIE,
BTVestlendingenIE,
- VGTVIE,
)
from .vh1 import VH1IE
from .vice import (
- ViceIE,
ViceArticleIE,
+ ViceIE,
ViceShowIE,
)
from .viddler import ViddlerIE
@@ -2198,42 +2288,46 @@ from .videocampus_sachsen import (
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videoken import (
+ VideoKenCategoryIE,
VideoKenIE,
VideoKenPlayerIE,
VideoKenPlaylistIE,
- VideoKenCategoryIE,
VideoKenTopicIE,
)
from .videomore import (
VideomoreIE,
- VideomoreVideoIE,
VideomoreSeasonIE,
+ VideomoreVideoIE,
)
from .videopress import VideoPressIE
from .vidio import (
VidioIE,
+ VidioLiveIE,
VidioPremierIE,
- VidioLiveIE
)
from .vidlii import VidLiiIE
from .vidly import VidlyIE
from .viewlift import (
- ViewLiftIE,
ViewLiftEmbedIE,
+ ViewLiftIE,
)
from .viidea import ViideaIE
+from .viki import (
+ VikiChannelIE,
+ VikiIE,
+)
from .vimeo import (
- VimeoIE,
+ VHXEmbedIE,
VimeoAlbumIE,
VimeoChannelIE,
VimeoGroupsIE,
+ VimeoIE,
VimeoLikesIE,
VimeoOndemandIE,
VimeoProIE,
VimeoReviewIE,
VimeoUserIE,
VimeoWatchLaterIE,
- VHXEmbedIE,
)
from .vimm import (
VimmIE,
@@ -2243,49 +2337,41 @@ from .vine import (
VineIE,
VineUserIE,
)
-from .viki import (
- VikiIE,
- VikiChannelIE,
-)
from .viously import ViouslyIE
from .viqeo import ViqeoIE
from .viu import (
ViuIE,
- ViuPlaylistIE,
ViuOTTIE,
ViuOTTIndonesiaIE,
+ ViuPlaylistIE,
)
from .vk import (
VKIE,
- VKUserVideosIE,
- VKWallPostIE,
VKPlayIE,
VKPlayLiveIE,
+ VKUserVideosIE,
+ VKWallPostIE,
)
from .vocaroo import VocarooIE
from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicy import (
- VoicyIE,
VoicyChannelIE,
+ VoicyIE,
)
from .volejtv import VolejTVIE
-from .voot import (
- VootIE,
- VootSeriesIE,
-)
from .voxmedia import (
- VoxMediaVolumeIE,
VoxMediaIE,
+ VoxMediaVolumeIE,
)
from .vrt import (
VRTIE,
- VrtNUIE,
- KetnetIE,
DagelijkseKostIE,
+ KetnetIE,
+ Radio1BeIE,
+ VrtNUIE,
)
from .vtm import VTMIE
-from .medialaan import MedialaanIE
from .vuclip import VuClipIE
from .vvvvid import (
VVVVIDIE,
@@ -2293,20 +2379,20 @@ from .vvvvid import (
)
from .walla import WallaIE
from .washingtonpost import (
- WashingtonPostIE,
WashingtonPostArticleIE,
+ WashingtonPostIE,
)
from .wat import WatIE
from .wdr import (
WDRIE,
- WDRPageIE,
WDRElefantIE,
WDRMobileIE,
+ WDRPageIE,
)
from .webcamerapl import WebcameraplIE
from .webcaster import (
- WebcasterIE,
WebcasterFeedIE,
+ WebcasterIE,
)
from .webofstories import (
WebOfStoriesIE,
@@ -2314,42 +2400,42 @@ from .webofstories import (
)
from .weibo import (
WeiboIE,
- WeiboVideoIE,
WeiboUserIE,
+ WeiboVideoIE,
)
from .weiqitv import WeiqiTVIE
from .weverse import (
WeverseIE,
- WeverseMediaIE,
- WeverseMomentIE,
+ WeverseLiveIE,
WeverseLiveTabIE,
+ WeverseMediaIE,
WeverseMediaTabIE,
- WeverseLiveIE,
+ WeverseMomentIE,
)
from .wevidi import WeVidiIE
from .weyyak import WeyyakIE
+from .whowatch import WhoWatchIE
from .whyp import WhypIE
from .wikimedia import WikimediaIE
from .wimbledon import WimbledonIE
from .wimtv import WimTVIE
-from .whowatch import WhoWatchIE
from .wistia import (
+ WistiaChannelIE,
WistiaIE,
WistiaPlaylistIE,
- WistiaChannelIE,
)
from .wordpress import (
- WordpressPlaylistEmbedIE,
WordpressMiniAudioPlayerEmbedIE,
+ WordpressPlaylistEmbedIE,
)
from .worldstarhiphop import WorldStarHipHopIE
from .wppilot import (
- WPPilotIE,
WPPilotChannelsIE,
+ WPPilotIE,
)
from .wrestleuniverse import (
- WrestleUniverseVODIE,
WrestleUniversePPVIE,
+ WrestleUniverseVODIE,
)
from .wsj import (
WSJIE,
@@ -2357,22 +2443,22 @@ from .wsj import (
)
from .wwe import WWEIE
from .wykop import (
- WykopDigIE,
WykopDigCommentIE,
- WykopPostIE,
+ WykopDigIE,
WykopPostCommentIE,
+ WykopPostIE,
)
from .xanimu import XanimuIE
from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
from .xhamster import (
- XHamsterIE,
XHamsterEmbedIE,
+ XHamsterIE,
XHamsterUserIE,
)
+from .xiaohongshu import XiaoHongShuIE
from .ximalaya import (
+ XimalayaAlbumIE,
XimalayaIE,
- XimalayaAlbumIE
)
from .xinpianchang import XinpianchangIE
from .xminus import XMinusIE
@@ -2380,27 +2466,27 @@ from .xnxx import XNXXIE
from .xstream import XstreamIE
from .xvideos import (
XVideosIE,
- XVideosQuickiesIE
+ XVideosQuickiesIE,
)
from .xxxymovies import XXXYMoviesIE
from .yahoo import (
YahooIE,
- YahooSearchIE,
YahooJapanNewsIE,
+ YahooSearchIE,
)
from .yandexdisk import YandexDiskIE
from .yandexmusic import (
- YandexMusicTrackIE,
YandexMusicAlbumIE,
- YandexMusicPlaylistIE,
- YandexMusicArtistTracksIE,
YandexMusicArtistAlbumsIE,
+ YandexMusicArtistTracksIE,
+ YandexMusicPlaylistIE,
+ YandexMusicTrackIE,
)
from .yandexvideo import (
YandexVideoIE,
YandexVideoPreviewIE,
- ZenYandexIE,
ZenYandexChannelIE,
+ ZenYandexIE,
)
from .yapfiles import YapFilesIE
from .yappy import (
@@ -2414,26 +2500,34 @@ from .youku import (
YoukuShowIE,
)
from .younow import (
- YouNowLiveIE,
YouNowChannelIE,
+ YouNowLiveIE,
YouNowMomentIE,
)
-from .youporn import YouPornIE
-from .yourporn import YourPornIE
-from .yourupload import YourUploadIE
+from .youporn import (
+ YouPornCategoryIE,
+ YouPornChannelIE,
+ YouPornCollectionIE,
+ YouPornIE,
+ YouPornStarIE,
+ YouPornTagIE,
+ YouPornVideosIE,
+)
from .zaiko import (
- ZaikoIE,
ZaikoETicketIE,
+ ZaikoIE,
)
from .zapiks import ZapiksIE
from .zattoo import (
BBVTVIE,
+ EWETVIE,
+ SAKTVIE,
+ VTXTVIE,
BBVTVLiveIE,
BBVTVRecordingsIE,
EinsUndEinsTVIE,
EinsUndEinsTVLiveIE,
EinsUndEinsTVRecordingsIE,
- EWETVIE,
EWETVLiveIE,
EWETVRecordingsIE,
GlattvisionTVIE,
@@ -2451,13 +2545,11 @@ from .zattoo import (
QuantumTVIE,
QuantumTVLiveIE,
QuantumTVRecordingsIE,
+ SAKTVLiveIE,
+ SAKTVRecordingsIE,
SaltTVIE,
SaltTVLiveIE,
SaltTVRecordingsIE,
- SAKTVIE,
- SAKTVLiveIE,
- SAKTVRecordingsIE,
- VTXTVIE,
VTXTVLiveIE,
VTXTVRecordingsIE,
WalyTVIE,
@@ -2468,7 +2560,10 @@ from .zattoo import (
ZattooMoviesIE,
ZattooRecordingsIE,
)
-from .zdf import ZDFIE, ZDFChannelIE
+from .zdf import (
+ ZDFIE,
+ ZDFChannelIE,
+)
from .zee5 import (
Zee5IE,
Zee5SeriesIE,
@@ -2478,16 +2573,16 @@ from .zenporn import ZenPornIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE
from .zingmp3 import (
- ZingMp3IE,
ZingMp3AlbumIE,
ZingMp3ChartHomeIE,
- ZingMp3WeekChartIE,
ZingMp3ChartMusicVideoIE,
- ZingMp3UserIE,
ZingMp3HubIE,
+ ZingMp3IE,
ZingMp3LiveRadioIE,
ZingMp3PodcastEpisodeIE,
ZingMp3PodcastIE,
+ ZingMp3UserIE,
+ ZingMp3WeekChartIE,
)
from .zoom import ZoomIE
from .zype import ZypeIE
diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py
index b217422..2c0d296 100644
--- a/yt_dlp/extractor/abc.py
+++ b/yt_dlp/extractor/abc.py
@@ -6,10 +6,10 @@ import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- dict_get,
ExtractorError,
- js_to_json,
+ dict_get,
int_or_none,
+ js_to_json,
parse_iso8601,
str_or_none,
traverse_obj,
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index fee7375..b8c79b9 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -12,20 +12,21 @@ import urllib.parse
import urllib.request
import urllib.response
import uuid
-from ..utils.networking import clean_proxies
+
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
ExtractorError,
+ OnDemandPagedList,
bytes_to_intlist,
decode_base_n,
int_or_none,
intlist_to_bytes,
- OnDemandPagedList,
time_seconds,
traverse_obj,
update_url_query,
)
+from ..utils.networking import clean_proxies
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py
index c3b4f43..0793319 100644
--- a/yt_dlp/extractor/acfun.py
+++ b/yt_dlp/extractor/acfun.py
@@ -3,10 +3,10 @@ from ..utils import (
float_or_none,
format_field,
int_or_none,
- str_or_none,
- traverse_obj,
parse_codecs,
parse_qs,
+ str_or_none,
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py
index 898d372..2f3b67d 100644
--- a/yt_dlp/extractor/adn.py
+++ b/yt_dlp/extractor/adn.py
@@ -10,18 +10,18 @@ from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_b64decode
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
ass_subtitles_timecode,
bytes_to_intlist,
bytes_to_long,
- ExtractorError,
float_or_none,
int_or_none,
intlist_to_bytes,
long_to_bytes,
parse_iso8601,
pkcs1pad,
- strip_or_none,
str_or_none,
+ strip_or_none,
try_get,
unified_strdate,
urlencode_postdata,
diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py
index d1525a1..08e9e51 100644
--- a/yt_dlp/extractor/adobetv.py
+++ b/yt_dlp/extractor/adobetv.py
@@ -4,11 +4,11 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ISO639Utils,
+ OnDemandPagedList,
float_or_none,
int_or_none,
- ISO639Utils,
join_nonempty,
- OnDemandPagedList,
parse_duration,
str_or_none,
str_to_int,
diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py
index 3d26d9c..3e5738f 100644
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@@ -1,25 +1,65 @@
import functools
-import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
- date_from_str,
+ UserNotLive,
determine_ext,
+ filter_dict,
int_or_none,
- qualities,
- traverse_obj,
- unified_strdate,
+ orderedSet,
unified_timestamp,
- update_url_query,
url_or_none,
urlencode_postdata,
- xpath_text,
+ urljoin,
)
+from ..utils.traversal import traverse_obj
-class AfreecaTVIE(InfoExtractor):
+class AfreecaTVBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'afreecatv'
+
+ def _perform_login(self, username, password):
+ login_form = {
+ 'szWork': 'login',
+ 'szType': 'json',
+ 'szUid': username,
+ 'szPassword': password,
+ 'isSaveId': 'false',
+ 'szScriptVar': 'oLoginRet',
+ 'szAction': '',
+ }
+
+ response = self._download_json(
+ 'https://login.afreecatv.com/app/LoginAction.php', None,
+ 'Logging in', data=urlencode_postdata(login_form))
+
+ _ERRORS = {
+ -4: 'Your account has been suspended due to a violation of our terms and policies.',
+ -5: 'https://member.afreecatv.com/app/user_delete_progress.php',
+ -6: 'https://login.afreecatv.com/membership/changeMember.php',
+ -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
+ -9: 'https://member.afreecatv.com/app/pop_login_block.php',
+ -11: 'https://login.afreecatv.com/afreeca/second_login.php',
+ -12: 'https://member.afreecatv.com/app/user_security.php',
+ 0: 'The username does not exist or you have entered the wrong password.',
+ -1: 'The username does not exist or you have entered the wrong password.',
+ -3: 'You have entered your username/password incorrectly.',
+ -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
+ -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
+ -32008: 'You have failed to log in. Please contact our Help Center.',
+ }
+
+ result = int_or_none(response.get('RESULT'))
+ if result != 1:
+ error = _ERRORS.get(result, 'You have failed to log in.')
+ raise ExtractorError(
+ 'Unable to login: %s said: %s' % (self.IE_NAME, error),
+ expected=True)
+
+
+class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv'
IE_DESC = 'afreecatv.com'
_VALID_URL = r'''(?x)
@@ -34,7 +74,6 @@ class AfreecaTVIE(InfoExtractor):
)
(?P<id>\d+)
'''
- _NETRC_MACHINE = 'afreecatv'
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@@ -87,6 +126,7 @@ class AfreecaTVIE(InfoExtractor):
'uploader': '♥이슬이',
'uploader_id': 'dasl8121',
'upload_date': '20170411',
+ 'timestamp': 1491929865,
'duration': 213,
},
'params': {
@@ -120,219 +160,102 @@ class AfreecaTVIE(InfoExtractor):
'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
- 'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
+ 'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
'upload_date': '20230108',
+ 'timestamp': 1673218805,
'title': '젠지 페이즈',
},
'params': {
'skip_download': True,
},
+ }, {
+ # adult content
+ 'url': 'https://vod.afreecatv.com/player/70395877',
+ 'only_matching': True,
+ }, {
+ # subscribers only
+ 'url': 'https://vod.afreecatv.com/player/104647403',
+ 'only_matching': True,
+ }, {
+ # private
+ 'url': 'https://vod.afreecatv.com/player/81669846',
+ 'only_matching': True,
}]
- @staticmethod
- def parse_video_key(key):
- video_key = {}
- m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
- if m:
- video_key['upload_date'] = m.group('upload_date')
- video_key['part'] = int(m.group('part'))
- return video_key
-
- def _perform_login(self, username, password):
- login_form = {
- 'szWork': 'login',
- 'szType': 'json',
- 'szUid': username,
- 'szPassword': password,
- 'isSaveId': 'false',
- 'szScriptVar': 'oLoginRet',
- 'szAction': '',
- }
-
- response = self._download_json(
- 'https://login.afreecatv.com/app/LoginAction.php', None,
- 'Logging in', data=urlencode_postdata(login_form))
-
- _ERRORS = {
- -4: 'Your account has been suspended due to a violation of our terms and policies.',
- -5: 'https://member.afreecatv.com/app/user_delete_progress.php',
- -6: 'https://login.afreecatv.com/membership/changeMember.php',
- -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
- -9: 'https://member.afreecatv.com/app/pop_login_block.php',
- -11: 'https://login.afreecatv.com/afreeca/second_login.php',
- -12: 'https://member.afreecatv.com/app/user_security.php',
- 0: 'The username does not exist or you have entered the wrong password.',
- -1: 'The username does not exist or you have entered the wrong password.',
- -3: 'You have entered your username/password incorrectly.',
- -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
- -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
- -32008: 'You have failed to log in. Please contact our Help Center.',
- }
-
- result = int_or_none(response.get('RESULT'))
- if result != 1:
- error = _ERRORS.get(result, 'You have failed to log in.')
- raise ExtractorError(
- 'Unable to login: %s said: %s' % (self.IE_NAME, error),
- expected=True)
-
def _real_extract(self, url):
video_id = self._match_id(url)
-
- partial_view = False
- adult_view = False
- for _ in range(2):
- data = self._download_json(
- 'https://api.m.afreecatv.com/station/video/a/view',
- video_id, headers={'Referer': url}, data=urlencode_postdata({
- 'nTitleNo': video_id,
- 'nApiLevel': 10,
- }))['data']
- if traverse_obj(data, ('code', {int})) == -6221:
- raise ExtractorError('The VOD does not exist', expected=True)
- query = {
+ data = self._download_json(
+ 'https://api.m.afreecatv.com/station/video/a/view', video_id,
+ headers={'Referer': url}, data=urlencode_postdata({
'nTitleNo': video_id,
- 'nStationNo': data['station_no'],
- 'nBbsNo': data['bbs_no'],
- }
- if partial_view:
- query['partialView'] = 'SKIP_ADULT'
- if adult_view:
- query['adultView'] = 'ADULT_VIEW'
- video_xml = self._download_xml(
- 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
- video_id, 'Downloading video info XML%s'
- % (' (skipping adult)' if partial_view else ''),
- video_id, headers={
- 'Referer': url,
- }, query=query)
-
- flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
- if flag and flag == 'SUCCEED':
- break
- if flag == 'PARTIAL_ADULT':
- self.report_warning(
- 'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
- 'Only content suitable for all ages will be downloaded. '
- 'Provide account credentials if you wish to download restricted content.')
- partial_view = True
- continue
- elif flag == 'ADULT':
- if not adult_view:
- adult_view = True
- continue
- error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
- else:
- error = flag
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, error), expected=True)
- else:
- raise ExtractorError('Unable to download video info')
-
- video_element = video_xml.findall('./track/video')[-1]
- if video_element is None or video_element.text is None:
- raise ExtractorError(
- 'Video %s does not exist' % video_id, expected=True)
-
- video_url = video_element.text.strip()
-
- title = xpath_text(video_xml, './track/title', 'title', fatal=True)
-
- uploader = xpath_text(video_xml, './track/nickname', 'uploader')
- uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
- duration = int_or_none(xpath_text(
- video_xml, './track/duration', 'duration'))
- thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
-
- common_entry = {
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'thumbnail': thumbnail,
- }
-
- info = common_entry.copy()
- info.update({
- 'id': video_id,
- 'title': title,
- 'duration': duration,
+ 'nApiLevel': 10,
+ }))['data']
+
+ error_code = traverse_obj(data, ('code', {int}))
+ if error_code == -6221:
+ raise ExtractorError('The VOD does not exist', expected=True)
+ elif error_code == -6205:
+ raise ExtractorError('This VOD is private', expected=True)
+
+ common_info = traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'uploader': ('writer_nick', {str}),
+ 'uploader_id': ('bj_id', {str}),
+ 'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}),
+ 'thumbnail': ('thumb', {url_or_none}),
})
- if not video_url:
- entries = []
- file_elements = video_element.findall('./file')
- one = len(file_elements) == 1
- for file_num, file_element in enumerate(file_elements, start=1):
- file_url = url_or_none(file_element.text)
- if not file_url:
- continue
- key = file_element.get('key', '')
- upload_date = unified_strdate(self._search_regex(
- r'^(\d{8})_', key, 'upload date', default=None))
- if upload_date is not None:
- # sometimes the upload date isn't included in the file name
- # instead, another random ID is, which may parse as a valid
- # date but be wildly out of a reasonable range
- parsed_date = date_from_str(upload_date)
- if parsed_date.year < 2000 or parsed_date.year >= 2100:
- upload_date = None
- file_duration = int_or_none(file_element.get('duration'))
- format_id = key if key else '%s_%s' % (video_id, file_num)
- if determine_ext(file_url) == 'm3u8':
- formats = self._extract_m3u8_formats(
- file_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls',
- note='Downloading part %d m3u8 information' % file_num)
- else:
- formats = [{
- 'url': file_url,
- 'format_id': 'http',
- }]
- if not formats and not self.get_param('ignore_no_formats'):
- continue
- file_info = common_entry.copy()
- file_info.update({
- 'id': format_id,
- 'title': title if one else '%s (part %d)' % (title, file_num),
- 'upload_date': upload_date,
- 'duration': file_duration,
- 'formats': formats,
+ entries = []
+ for file_num, file_element in enumerate(
+ traverse_obj(data, ('files', lambda _, v: url_or_none(v['file']))), start=1):
+ file_url = file_element['file']
+ if determine_ext(file_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ file_url, video_id, 'mp4', m3u8_id='hls',
+ note=f'Downloading part {file_num} m3u8 information')
+ else:
+ formats = [{
+ 'url': file_url,
+ 'format_id': 'http',
+ }]
+
+ entries.append({
+ **common_info,
+ 'id': file_element.get('file_info_key') or f'{video_id}_{file_num}',
+ 'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
+ 'formats': formats,
+ **traverse_obj(file_element, {
+ 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
+ 'timestamp': ('file_start', {unified_timestamp}),
})
- entries.append(file_info)
- entries_info = info.copy()
- entries_info.update({
- '_type': 'multi_video',
- 'entries': entries,
})
- return entries_info
-
- info = {
- 'id': video_id,
- 'title': title,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'duration': duration,
- 'thumbnail': thumbnail,
- }
- if determine_ext(video_url) == 'm3u8':
- info['formats'] = self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- else:
- app, playpath = video_url.split('mp4:')
- info.update({
- 'url': app,
- 'ext': 'flv',
- 'play_path': 'mp4:' + playpath,
- 'rtmp_live': True, # downloading won't end without this
- })
+ if traverse_obj(data, ('adult_status', {str})) == 'notLogin':
+ if not entries:
+ self.raise_login_required(
+ 'Only users older than 19 are able to watch this video', method='password')
+ self.report_warning(
+ 'In accordance with local laws and regulations, underage users are '
+ 'restricted from watching adult content. Only content suitable for all '
+ f'ages will be downloaded. {self._login_hint("password")}')
+
+ if not entries and traverse_obj(data, ('sub_upload_type', {str})):
+ self.raise_login_required('This VOD is for subscribers only', method='password')
+
+ if len(entries) == 1:
+ return {
+ **entries[0],
+ 'title': common_info.get('title'),
+ }
- return info
+ common_info['timestamp'] = traverse_obj(entries, (..., 'timestamp'), get_all=False)
+ return self.playlist_result(entries, video_id, multi_video=True, **common_info)
-class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
+class AfreecaTVLiveIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:live'
+ IE_DESC = 'afreecatv.com livestreams'
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
_TESTS = [{
'url': 'https://play.afreecatv.com/pyh3646/237852185',
@@ -347,77 +270,97 @@ class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
},
'skip': 'Livestream has ended',
}, {
- 'url': 'http://play.afreeca.com/pyh3646/237852185',
+ 'url': 'https://play.afreecatv.com/pyh3646/237852185',
'only_matching': True,
}, {
- 'url': 'http://play.afreeca.com/pyh3646',
+ 'url': 'https://play.afreecatv.com/pyh3646',
'only_matching': True,
}]
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
-
- _QUALITIES = ('sd', 'hd', 'hd2k', 'original')
+ _WORKING_CDNS = [
+ 'gcp_cdn', # live-global-cdn-v02.afreecatv.com
+ 'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
+ 'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
+ 'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
+ ]
+ _BAD_CDNS = [
+ 'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
+ 'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
+ 'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
+ 'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
+ 'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
+ ]
+
+ def _extract_formats(self, channel_info, broadcast_no, aid):
+ stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
+
+ # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
+ default_cdn_ids = orderedSet([
+ *traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)),
+ *self._WORKING_CDNS,
+ ])
+ cdn_ids = self._configuration_arg('cdn', default_cdn_ids)
+
+ for attempt, cdn_id in enumerate(cdn_ids, start=1):
+ m3u8_url = traverse_obj(self._download_json(
+ urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no,
+ f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info',
+ fatal=False, query={
+ 'return_type': cdn_id,
+ 'broad_key': f'{broadcast_no}-common-master-hls',
+ }), ('view_url', {url_or_none}))
+ try:
+ return self._extract_m3u8_formats(
+ m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
+ headers={'Referer': 'https://play.afreecatv.com/'})
+ except ExtractorError as e:
+ if attempt == len(cdn_ids):
+ raise
+ self.report_warning(
+ f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})')
def _real_extract(self, url):
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
- password = self.get_param('videopassword')
+ channel_info = traverse_obj(self._download_json(
+ self._LIVE_API_URL, broadcaster_id, data=urlencode_postdata({'bid': broadcaster_id})),
+ ('CHANNEL', {dict})) or {}
- info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
- data=urlencode_postdata({'bid': broadcaster_id})) or {}
- channel_info = info.get('CHANNEL') or {}
broadcaster_id = channel_info.get('BJID') or broadcaster_id
broadcast_no = channel_info.get('BNO') or broadcast_no
- password_protected = channel_info.get('BPWD')
if not broadcast_no:
- raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
- if password_protected == 'Y' and password is None:
+ raise UserNotLive(video_id=broadcaster_id)
+
+ password = self.get_param('videopassword')
+ if channel_info.get('BPWD') == 'Y' and password is None:
raise ExtractorError(
'This livestream is protected by a password, use the --video-password option',
expected=True)
- formats = []
- quality_key = qualities(self._QUALITIES)
- for quality_str in self._QUALITIES:
- params = {
+ token_info = traverse_obj(self._download_json(
+ self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
+ 'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
'bno': broadcast_no,
'stream_type': 'common',
'type': 'aid',
- 'quality': quality_str,
- }
- if password is not None:
- params['pwd'] = password
- aid_response = self._download_json(
- self._LIVE_API_URL, broadcast_no, fatal=False,
- data=urlencode_postdata(params),
- note=f'Downloading access token for {quality_str} stream',
- errnote=f'Unable to download access token for {quality_str} stream')
- aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
- if not aid:
- continue
-
- stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
- stream_info = self._download_json(
- f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False,
- query={
- 'return_type': channel_info.get('CDN', 'gcp_cdn'),
- 'broad_key': f'{broadcast_no}-common-{quality_str}-hls',
- },
- note=f'Downloading metadata for {quality_str} stream',
- errnote=f'Unable to download metadata for {quality_str} stream') or {}
-
- if stream_info.get('view_url'):
- formats.append({
- 'format_id': quality_str,
- 'url': update_url_query(stream_info['view_url'], {'aid': aid}),
- 'ext': 'mp4',
- 'protocol': 'm3u8',
- 'quality': quality_key(quality_str),
- })
-
- station_info = self._download_json(
+ 'quality': 'master',
+ 'pwd': password,
+ }))), ('CHANNEL', {dict})) or {}
+ aid = token_info.get('AID')
+ if not aid:
+ result = token_info.get('RESULT')
+ if result == 0:
+ raise ExtractorError('This livestream has ended', expected=True)
+ elif result == -6:
+ self.raise_login_required('This livestream is for subscribers only', method='password')
+ raise ExtractorError('Unable to extract access token')
+
+ formats = self._extract_formats(channel_info, broadcast_no, aid)
+
+ station_info = traverse_obj(self._download_json(
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
- query={'szBjId': broadcaster_id}, fatal=False,
- note='Downloading channel metadata', errnote='Unable to download channel metadata') or {}
+ 'Downloading channel metadata', 'Unable to download channel metadata',
+ query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
return {
'id': broadcast_no,
@@ -427,6 +370,7 @@ class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
'timestamp': unified_timestamp(station_info.get('broad_start')),
'formats': formats,
'is_live': True,
+ 'http_headers': {'Referer': url},
}
diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py
index 0b73a96..6cc63cd 100644
--- a/yt_dlp/extractor/airtv.py
+++ b/yt_dlp/extractor/airtv.py
@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_iso8601,
- traverse_obj
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py
index 87219f2..49df4bf 100644
--- a/yt_dlp/extractor/allstar.py
+++ b/yt_dlp/extractor/allstar.py
@@ -12,7 +12,6 @@ from ..utils import (
)
from ..utils.traversal import traverse_obj
-
_FIELDS = '''
_id
clipImageSource
diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py
index 8d5b472..f927965 100644
--- a/yt_dlp/extractor/alphaporno.py
+++ b/yt_dlp/extractor/alphaporno.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..utils import (
- parse_iso8601,
+ int_or_none,
parse_duration,
parse_filesize,
- int_or_none,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py
index bfe066b..cb2b989 100644
--- a/yt_dlp/extractor/alura.py
+++ b/yt_dlp/extractor/alura.py
@@ -1,17 +1,13 @@
import re
from .common import InfoExtractor
-
-from ..compat import (
- compat_urlparse,
-)
-
+from ..compat import compat_urlparse
from ..utils import (
+ ExtractorError,
+ clean_html,
+ int_or_none,
urlencode_postdata,
urljoin,
- int_or_none,
- clean_html,
- ExtractorError
)
@@ -39,7 +35,7 @@ class AluraIE(InfoExtractor):
def _real_extract(self, url):
- course, video_id = self._match_valid_url(url)
+ course, video_id = self._match_valid_url(url).group('course_name', 'id')
video_url = self._VIDEO_URL % (course, video_id)
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
@@ -52,7 +48,7 @@ class AluraIE(InfoExtractor):
formats = []
for video_obj in video_dict:
- video_url_m3u8 = video_obj.get('link')
+ video_url_m3u8 = video_obj.get('mp4')
video_format = self._extract_m3u8_formats(
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py
index 5018710..509b21a 100644
--- a/yt_dlp/extractor/amara.py
+++ b/yt_dlp/extractor/amara.py
@@ -1,6 +1,6 @@
from .common import InfoExtractor
-from .youtube import YoutubeIE
from .vimeo import VimeoIE
+from .youtube import YoutubeIE
from ..utils import (
int_or_none,
parse_iso8601,
diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py
index 0d259c5..6b2bf2d 100644
--- a/yt_dlp/extractor/amp.py
+++ b/yt_dlp/extractor/amp.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
mimetype2ext,
parse_iso8601,
diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py
index 52f2ad0..5e78f37 100644
--- a/yt_dlp/extractor/anchorfm.py
+++ b/yt_dlp/extractor/anchorfm.py
@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
- unified_timestamp
+ unified_timestamp,
)
diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py
index 306b365..9f5b9b5 100644
--- a/yt_dlp/extractor/angel.py
+++ b/yt_dlp/extractor/angel.py
@@ -1,7 +1,7 @@
import re
from .common import InfoExtractor
-from ..utils import url_or_none, merge_dicts
+from ..utils import merge_dicts, url_or_none
class AngelIE(InfoExtractor):
diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py
index d00b0f9..433eb4e 100644
--- a/yt_dlp/extractor/appleconnect.py
+++ b/yt_dlp/extractor/appleconnect.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- str_to_int,
- ExtractorError
-)
+from ..utils import ExtractorError, str_to_int
class AppleConnectIE(InfoExtractor):
diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py
index 2e0b0a8..21103ae 100644
--- a/yt_dlp/extractor/appletrailers.py
+++ b/yt_dlp/extractor/appletrailers.py
@@ -1,5 +1,5 @@
-import re
import json
+import re
from .common import InfoExtractor
from ..compat import compat_urlparse
diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py
index 46e68d6..3db59c5 100644
--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@@ -1,5 +1,5 @@
+import functools
import re
-from functools import partial
from .common import InfoExtractor
from ..utils import (
@@ -349,7 +349,7 @@ class ARDBetaMediathekIE(InfoExtractor):
r'(?P<title>.*)',
]
- return traverse_obj(patterns, (..., {partial(re.match, string=title)}, {
+ return traverse_obj(patterns, (..., {functools.partial(re.match, string=title)}, {
'season_number': ('season_number', {int_or_none}),
'episode_number': ('episode_number', {int_or_none}),
'episode': ((
diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py
index a493714..9a5524a 100644
--- a/yt_dlp/extractor/arnes.py
+++ b/yt_dlp/extractor/arnes.py
@@ -4,8 +4,8 @@ from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
- format_field,
float_or_none,
+ format_field,
int_or_none,
parse_iso8601,
remove_start,
diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py
index 1c180b1..46fe006 100644
--- a/yt_dlp/extractor/arte.py
+++ b/yt_dlp/extractor/arte.py
@@ -5,6 +5,7 @@ from ..utils import (
ExtractorError,
GeoRestrictedError,
int_or_none,
+ join_nonempty,
parse_iso8601,
parse_qs,
strip_or_none,
@@ -32,20 +33,6 @@ class ArteTVIE(ArteTVBaseIE):
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'only_matching': True,
}, {
- 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
- 'info_dict': {
- 'id': '100103-000-A',
- 'title': 'USA: Dyskryminacja na porodówce',
- 'description': 'md5:242017b7cce59ffae340a54baefcafb1',
- 'alt_title': 'ARTE Reportage',
- 'upload_date': '20201103',
- 'duration': 554,
- 'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
- 'timestamp': 1604417980,
- 'ext': 'mp4',
- },
- 'params': {'skip_download': 'm3u8'}
- }, {
'note': 'No alt_title',
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
'only_matching': True,
@@ -59,6 +46,23 @@ class ArteTVIE(ArteTVBaseIE):
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
'only_matching': True,
}, {
+ 'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
+ 'info_dict': {
+ 'id': '109067-000-A',
+ 'ext': 'mp4',
+ 'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
+ 'timestamp': 1713927600,
+ 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
+ 'duration': 7599,
+ 'title': 'La loi de Téhéran',
+ 'upload_date': '20240424',
+ 'subtitles': {
+ 'fr': 'mincount:1',
+ 'fr-acc': 'mincount:1',
+ 'fr-forced': 'mincount:1',
+ },
+ },
+ }, {
'note': 'age-restricted',
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
'info_dict': {
@@ -71,23 +75,7 @@ class ArteTVIE(ArteTVBaseIE):
'upload_date': '20230930',
'ext': 'mp4',
},
- }, {
- 'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
- 'info_dict': {
- 'id': '085374-003-A',
- 'ext': 'mp4',
- 'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
- 'timestamp': 1702872000,
- 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
- 'duration': 2594,
- 'title': 'Die kurze Zeit der Jugend',
- 'alt_title': 'Im hohen Norden geboren',
- 'upload_date': '20231218',
- 'subtitles': {
- 'fr': 'mincount:1',
- 'fr-acc': 'mincount:1',
- },
- },
+ 'skip': '404 Not Found',
}]
_GEO_BYPASS = True
@@ -143,16 +131,18 @@ class ArteTVIE(ArteTVBaseIE):
updated_subs = {}
for lang, sub_formats in subs.items():
for fmt in sub_formats:
- if fmt.get('url', '').endswith('-MAL.m3u8'):
- lang += '-acc'
- updated_subs.setdefault(lang, []).append(fmt)
+ url = fmt.get('url') or ''
+ suffix = ('acc' if url.endswith('-MAL.m3u8')
+ else 'forced' if '_VO' not in url
+ else None)
+ updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt)
return updated_subs
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')
- langauge_code = self._LANG_MAP.get(lang)
+ language_code = self._LANG_MAP.get(lang)
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
'x-validated-age': '18'
@@ -180,10 +170,10 @@ class ArteTVIE(ArteTVBaseIE):
m = self._VERSION_CODE_RE.match(stream_version_code)
if m:
lang_pref = int(''.join('01'[x] for x in (
- m.group('vlang') == langauge_code, # we prefer voice in the requested language
+ m.group('vlang') == language_code, # we prefer voice in the requested language
not m.group('audio_desc'), # and not the audio description version
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
- m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
+ m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
not m.group('has_sub'), # but we prefer no subtitles otherwise
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
)))
diff --git a/yt_dlp/extractor/asobistage.py b/yt_dlp/extractor/asobistage.py
new file mode 100644
index 0000000..8fa8f3e
--- /dev/null
+++ b/yt_dlp/extractor/asobistage.py
@@ -0,0 +1,154 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import str_or_none, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class AsobiStageIE(InfoExtractor):
+ IE_DESC = 'ASOBISTAGE (アソビステージ)'
+ _VALID_URL = r'https?://asobistage\.asobistore\.jp/event/(?P<id>(?P<event>\w+)/(?P<type>archive|player)/(?P<slug>\w+))(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://asobistage.asobistore.jp/event/315passionhour_2022summer/archive/frame',
+ 'info_dict': {
+ 'id': '315passionhour_2022summer/archive/frame',
+ 'title': '315プロダクションプレゼンツ 315パッションアワー!!!',
+ 'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'edff52f2',
+ 'ext': 'mp4',
+ 'title': '315passion_FRAME_only',
+ 'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
+ },
+ }],
+ }, {
+ 'url': 'https://asobistage.asobistore.jp/event/idolmaster_idolworld2023_goods/archive/live',
+ 'info_dict': {
+ 'id': 'idolmaster_idolworld2023_goods/archive/live',
+ 'title': 'md5:378510b6e830129d505885908bd6c576',
+ 'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '3aef7110',
+ 'ext': 'mp4',
+ 'title': 'asobistore_station_1020_serverREC',
+ 'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
+ },
+ }],
+ }, {
+ 'url': 'https://asobistage.asobistore.jp/event/sidem_fclive_bpct/archive/premium_hc',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'id': 'sidem_fclive_bpct/archive/premium_hc',
+ 'title': '315 Production presents F@NTASTIC COMBINATION LIVE ~BRAINPOWER!!~/~CONNECTIME!!!!~',
+ 'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
+ },
+ }, {
+ 'url': 'https://asobistage.asobistore.jp/event/ijigenfes_utagassen/player/day1',
+ 'only_matching': True,
+ }]
+
+ _API_HOST = 'https://asobistage-api.asobistore.jp'
+ _HEADERS = {}
+ _is_logged_in = False
+
+ @functools.cached_property
+ def _owned_tickets(self):
+ owned_tickets = set()
+ if not self._is_logged_in:
+ return owned_tickets
+
+ for path, name in [
+ ('api/v1/purchase_history/list', 'ticket purchase history'),
+ ('api/v1/serialcode/list', 'redemption history'),
+ ]:
+ response = self._download_json(
+ f'{self._API_HOST}/{path}', None, f'Downloading {name}',
+ f'Unable to download {name}', expected_status=400)
+ if traverse_obj(response, ('payload', 'error_message'), 'error') == 'notlogin':
+ self._is_logged_in = False
+ break
+ owned_tickets.update(
+ traverse_obj(response, ('payload', 'value', ..., 'digital_product_id', {str_or_none})))
+
+ return owned_tickets
+
+ def _get_available_channel_id(self, channel):
+ channel_id = traverse_obj(channel, ('chennel_vspf_id', {str}))
+ if not channel_id:
+ return None
+ # if rights_type_id == 6, then 'No conditions (no login required - non-members are OK)'
+ if traverse_obj(channel, ('viewrights', lambda _, v: v['rights_type_id'] == 6)):
+ return channel_id
+ available_tickets = traverse_obj(channel, (
+ 'viewrights', ..., ('tickets', 'serialcodes'), ..., 'digital_product_id', {str_or_none}))
+ if not self._owned_tickets.intersection(available_tickets):
+ self.report_warning(
+ f'You are not a ticketholder for "{channel.get("channel_name") or channel_id}"')
+ return None
+ return channel_id
+
+ def _real_initialize(self):
+ if self._get_cookies(self._API_HOST):
+ self._is_logged_in = True
+ token = self._download_json(
+ f'{self._API_HOST}/api/v1/vspf/token', None, 'Getting token', 'Unable to get token')
+ self._HEADERS['Authorization'] = f'Bearer {token}'
+
+ def _real_extract(self, url):
+ video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
+ video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
+ webpage = self._download_webpage(url, video_id)
+ event_data = traverse_obj(
+ self._search_nextjs_data(webpage, video_id, default={}),
+ ('props', 'pageProps', 'eventCMSData', {
+ 'title': ('event_name', {str}),
+ 'thumbnail': ('event_thumbnail_image', {url_or_none}),
+ }))
+
+ available_channels = traverse_obj(self._download_json(
+ f'https://asobistage.asobistore.jp/cdn/v101/events/{event}/{video_type}.json',
+ video_id, 'Getting channel list', 'Unable to get channel list'), (
+ video_type, lambda _, v: v['broadcast_slug'] == slug,
+ 'channels', lambda _, v: v['chennel_vspf_id'] != '00000'))
+
+ entries = []
+ for channel_id in traverse_obj(available_channels, (..., {self._get_available_channel_id})):
+ if video_type == 'archives':
+ channel_json = self._download_json(
+ f'https://survapi.channel.or.jp/proxy/v1/contents/{channel_id}/get_by_cuid', channel_id,
+ 'Getting archive channel info', 'Unable to get archive channel info', fatal=False,
+ headers=self._HEADERS)
+ channel_data = traverse_obj(channel_json, ('ex_content', {
+ 'm3u8_url': 'streaming_url',
+ 'title': 'title',
+ 'thumbnail': ('thumbnail', 'url'),
+ }))
+ else: # video_type == 'broadcasts'
+ channel_json = self._download_json(
+ f'https://survapi.channel.or.jp/ex/events/{channel_id}', channel_id,
+ 'Getting live channel info', 'Unable to get live channel info', fatal=False,
+ headers=self._HEADERS, query={'embed': 'channel'})
+ channel_data = traverse_obj(channel_json, ('data', {
+ 'm3u8_url': ('Channel', 'Custom_live_url'),
+ 'title': 'Name',
+ 'thumbnail': 'Poster_url',
+ }))
+
+ entries.append({
+ 'id': channel_id,
+ 'title': channel_data.get('title'),
+ 'formats': self._extract_m3u8_formats(channel_data.get('m3u8_url'), channel_id, fatal=False),
+ 'is_live': video_type == 'broadcasts',
+ 'thumbnail': url_or_none(channel_data.get('thumbnail')),
+ })
+
+ if not self._is_logged_in and not entries:
+ self.raise_login_required()
+
+ return self.playlist_result(entries, video_id, **event_data)
diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py
index d6ed9e4..20ee34c 100644
--- a/yt_dlp/extractor/atvat.py
+++ b/yt_dlp/extractor/atvat.py
@@ -1,11 +1,11 @@
-import datetime
+import datetime as dt
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
float_or_none,
jwt_encode_hs256,
try_get,
- ExtractorError,
)
@@ -71,9 +71,9 @@ class ATVAtIE(InfoExtractor):
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
for id, content in enumerate(contentResource)]
- time_of_request = datetime.datetime.now()
- not_before = time_of_request - datetime.timedelta(minutes=5)
- expire = time_of_request + datetime.timedelta(minutes=5)
+ time_of_request = dt.datetime.now()
+ not_before = time_of_request - dt.timedelta(minutes=5)
+ expire = time_of_request + dt.timedelta(minutes=5)
payload = {
'content_ids': {
content_id: content_ids,
diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py
index 6fc938d..a8dfb3e 100644
--- a/yt_dlp/extractor/awaan.py
+++ b/yt_dlp/extractor/awaan.py
@@ -2,8 +2,8 @@ import base64
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_urlencode,
compat_str,
+ compat_urllib_parse_urlencode,
)
from ..utils import (
format_field,
diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py
index c4741a6..4ebef92 100644
--- a/yt_dlp/extractor/aws.py
+++ b/yt_dlp/extractor/aws.py
@@ -1,4 +1,4 @@
-import datetime
+import datetime as dt
import hashlib
import hmac
@@ -12,7 +12,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
def _aws_execute_api(self, aws_dict, video_id, query=None):
query = query or {}
- amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
+ amz_date = dt.datetime.now(dt.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
date = amz_date[:8]
headers = {
'Accept': 'application/json',
diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py
index 67af29a..c4e07a7 100644
--- a/yt_dlp/extractor/banbye.py
+++ b/yt_dlp/extractor/banbye.py
@@ -2,12 +2,12 @@ import math
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_urlparse,
compat_parse_qs,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
- format_field,
InAdvancePagedList,
+ format_field,
traverse_obj,
unified_timestamp,
)
diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py
index 51e7220..82dc9ab 100644
--- a/yt_dlp/extractor/bannedvideo.py
+++ b/yt_dlp/extractor/bannedvideo.py
@@ -2,11 +2,11 @@ import json
from .common import InfoExtractor
from ..utils import (
- try_get,
- int_or_none,
- url_or_none,
float_or_none,
+ int_or_none,
+ try_get,
unified_timestamp,
+ url_or_none,
)
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index 015af9e..f6b58b3 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'url': 'http://www.bbc.com/news/world-europe-32668511',
'info_dict': {
'id': 'world-europe-32668511',
- 'title': 'Russia stages massive WW2 parade',
+ 'title': 'Russia stages massive WW2 parade despite Western boycott',
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
},
'playlist_count': 2,
@@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'info_dict': {
'id': '3662a707-0af9-3149-963f-47bea720b460',
'title': 'BUGGER',
+ 'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
},
'playlist_count': 18,
}, {
@@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'info_dict': {
'id': 'p02mprgb',
'ext': 'mp4',
- 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
- 'description': 'md5:2868290467291b37feda7863f7a83f54',
+ 'title': 'Germanwings crash site aerial video',
+ 'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
'duration': 47,
'timestamp': 1427219242,
'upload_date': '20150324',
+ 'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
},
'params': {
- # rtmp download
'skip_download': True,
}
}, {
@@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
},
'params': {
'skip_download': True,
- }
+ },
+ 'skip': 'now SIMORGH_DATA with no video',
}, {
# single video embedded with data-playable containing XML playlists (regional section)
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
'info_dict': {
- 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
+ 'id': '39275083',
+ 'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
'ext': 'mp4',
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
- 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
+ 'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
'timestamp': 1434713142,
'upload_date': '20150619',
+ 'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
},
'params': {
'skip_download': True,
- }
+ },
}, {
# single video from video playlist embedded with vxp-playlist-data JSON
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
@@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
},
'params': {
'skip_download': True,
- }
+ },
+ 'skip': '404 Not Found',
}, {
- # single video story with digitalData
+ # single video story with __PWA_PRELOADED_STATE__
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
'info_dict': {
'id': 'p02q6gc4',
- 'ext': 'flv',
- 'title': 'Sri Lanka’s spicy secret',
- 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
- 'timestamp': 1437674293,
- 'upload_date': '20150723',
+ 'ext': 'mp4',
+ 'title': 'Tasting the spice of life in Jaffna',
+ 'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
+ 'timestamp': 1646058397,
+ 'upload_date': '20220228',
+ 'duration': 255,
+ 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
}, {
# single video story without digitalData
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
@@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'timestamp': 1415867444,
'upload_date': '20141113',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
+ 'skip': 'redirects to TopGear home page',
}, {
# single video embedded with Morph
+ # TODO: replacement test page
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
'info_dict': {
'id': 'p041vhd0',
@@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'uploader': 'BBC Sport',
'uploader_id': 'bbc_sport',
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'Georestricted to UK',
+ 'skip': 'Video no longer in page',
}, {
- # single video with playlist.sxml URL in playlist param
+ # single video in __INITIAL_DATA__
'url': 'http://www.bbc.com/sport/0/football/33653409',
'info_dict': {
'id': 'p02xycnp',
'ext': 'mp4',
- 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
- 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
+ 'title': 'Ronaldo to Man Utd, Arsenal to spend?',
+ 'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
+ 'timestamp': 1437750175,
+ 'upload_date': '20150724',
+ 'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
'duration': 140,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
}, {
- # article with multiple videos embedded with playlist.sxml in playlist param
+ # article with multiple videos embedded with Morph.setPayload
'url': 'http://www.bbc.com/sport/0/football/34475836',
'info_dict': {
'id': '34475836',
@@ -755,6 +751,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
},
'playlist_count': 3,
}, {
+ # Testing noplaylist
+ 'url': 'http://www.bbc.com/sport/0/football/34475836',
+ 'info_dict': {
+ 'id': 'p034ppnv',
+ 'ext': 'mp4',
+ 'title': 'All you need to know about Jurgen Klopp',
+ 'timestamp': 1444335081,
+ 'upload_date': '20151008',
+ 'duration': 122.0,
+ 'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
+ },
+ 'params': {
+ 'noplaylist': True,
+ },
+ }, {
# school report article with single video
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
'info_dict': {
@@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'title': 'School which breaks down barriers in Jerusalem',
},
'playlist_count': 1,
+ 'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
}, {
# single video with playlist URL from weather section
'url': 'http://www.bbc.com/weather/features/33601775',
@@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'thumbnail': r're:https?://.+/.+\.jpg',
'timestamp': 1437785037,
'upload_date': '20150725',
+ 'duration': 105,
},
}, {
# video with window.__INITIAL_DATA__ and value as JSON string
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
'info_dict': {
- 'id': 'p0b71qth',
+ 'id': 'p0b779gc',
'ext': 'mp4',
'title': 'Why France is making this woman a national hero',
- 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
+ 'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
'thumbnail': r're:https?://.+/.+\.jpg',
- 'timestamp': 1638230731,
- 'upload_date': '20211130',
+ 'timestamp': 1638215626,
+ 'upload_date': '20211129',
+ 'duration': 125,
+ },
+ }, {
+ # video with script id __NEXT_DATA__ and value as JSON string
+ 'url': 'https://www.bbc.com/news/uk-68546268',
+ 'info_dict': {
+ 'id': 'p0hj0lq7',
+ 'ext': 'mp4',
+ 'title': 'Nasser Hospital doctor describes his treatment by IDF',
+ 'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
+ 'thumbnail': r're:https?://.+/.+\.jpg',
+ 'timestamp': 1710188248,
+ 'upload_date': '20240311',
+ 'duration': 104,
},
}, {
# single video article embedded with data-media-vpid
@@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'uploader': 'Radio 3',
'uploader_id': 'bbc_radio_three',
},
+ 'skip': '404 Not Found',
}, {
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
'info_dict': {
@@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'ext': 'mp4',
'title': 'md5:2fabf12a726603193a2879a055f72514',
'description': 'Learn English words and phrases from this story',
+ 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
},
'add_ie': [BBCCoUkIE.ie_key()],
}, {
@@ -832,28 +861,30 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'info_dict': {
'id': 'p07c6sb9',
'ext': 'mp4',
- 'title': 'How positive thinking is harming your happiness',
- 'alt_title': 'The downsides of positive thinking',
- 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
+ 'title': 'The downsides of positive thinking',
+ 'description': 'The downsides of positive thinking',
'duration': 235,
- 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
- 'upload_date': '20190604',
- 'categories': ['Psychology'],
+ 'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
+ 'upload_date': '20220223',
+ 'timestamp': 1645632746,
},
}, {
# BBC Sounds
- 'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
+ 'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
'info_dict': {
- 'id': 'm001q789',
+ 'id': 'p0hrw4nr',
'ext': 'mp4',
- 'title': 'The Night Tracks Mix - Music for the darkling hour',
- 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
- 'chapters': 'count:8',
- 'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
- 'uploader': 'Radio 3',
- 'duration': 1800,
- 'uploader_id': 'bbc_radio_three',
- },
+ 'title': 'Are our coastlines being washed away?',
+ 'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
+ 'timestamp': 1713556800,
+ 'upload_date': '20240419',
+ 'duration': 1588,
+ 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
+ 'uploader': 'World Service',
+ 'uploader_id': 'bbc_world_service',
+ 'series': 'CrowdScience',
+ 'chapters': [],
+ }
}, { # onion routes
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
'only_matching': True,
@@ -1008,8 +1039,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
webpage, 'group id', default=None)
if group_id:
return self.url_result(
- 'https://www.bbc.co.uk/programmes/%s' % group_id,
- ie=BBCCoUkIE.ie_key())
+ f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
programme_id = self._search_regex(
@@ -1069,83 +1099,133 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
}
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
- # There are several setPayload calls may be present but the video
- # seems to be always related to the first one
- morph_payload = self._parse_json(
- self._search_regex(
- r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
- webpage, 'morph payload', default='{}'),
- playlist_id, fatal=False)
+ # Several setPayload calls may be present but the video(s)
+ # should be in one that mentions leadMedia or videoData
+ morph_payload = self._search_json(
+ r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
+ contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
+ default={})
if morph_payload:
- components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
- for component in components:
- if not isinstance(component, dict):
- continue
- lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
- if not lead_media:
- continue
- identifiers = lead_media.get('identifiers')
- if not identifiers or not isinstance(identifiers, dict):
- continue
- programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
+ for lead_media in traverse_obj(morph_payload, (
+ 'body', 'components', ..., 'props', 'leadMedia', {dict})):
+ programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
if not programme_id:
continue
- title = lead_media.get('title') or self._og_search_title(webpage)
formats, subtitles = self._download_media_selector(programme_id)
- description = lead_media.get('summary')
- uploader = lead_media.get('masterBrand')
- uploader_id = lead_media.get('mid')
- duration = None
- duration_d = lead_media.get('duration')
- if isinstance(duration_d, dict):
- duration = parse_duration(dict_get(
- duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
return {
'id': programme_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
+ 'title': lead_media.get('title') or self._og_search_title(webpage),
+ **traverse_obj(lead_media, {
+ 'description': ('summary', {str}),
+ 'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
+ 'uploader': ('masterBrand', {str}),
+ 'uploader_id': ('mid', {str}),
+ }),
'formats': formats,
'subtitles': subtitles,
}
+ body = self._parse_json(traverse_obj(morph_payload, (
+ 'body', 'content', 'article', 'body')), playlist_id, fatal=False)
+ for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
+ if video_data.get('vpid'):
+ video_id = video_data['vpid']
+ formats, subtitles = self._download_media_selector(video_id)
+ entry = {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+ else:
+ video_id = video_data['pid']
+ entry = self.url_result(
+ f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
+ video_id, url_transparent=True)
+ entry.update({
+ 'timestamp': traverse_obj(morph_payload, (
+ 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
+ ),
+ **traverse_obj(video_data, {
+ 'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
+ 'title': (('title', 'caption'), {str}, any),
+ 'duration': ('duration', {parse_duration}),
+ }),
+ })
+ if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
+ return entry
+ entries.append(entry)
+ if entries:
+ playlist_title = traverse_obj(morph_payload, (
+ 'body', 'content', 'article', 'headline', {str})) or playlist_title
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
- preload_state = self._parse_json(self._search_regex(
- r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
- 'preload state', default='{}'), playlist_id, fatal=False)
- if preload_state:
- current_programme = preload_state.get('programmes', {}).get('current') or {}
- programme_id = current_programme.get('id')
- if current_programme and programme_id and current_programme.get('type') == 'playable_item':
- title = current_programme.get('titles', {}).get('tertiary') or playlist_title
- formats, subtitles = self._download_media_selector(programme_id)
- synopses = current_programme.get('synopses') or {}
- network = current_programme.get('network') or {}
- duration = int_or_none(
- current_programme.get('duration', {}).get('value'))
- thumbnail = None
- image_url = current_programme.get('image_url')
- if image_url:
- thumbnail = image_url.replace('{recipe}', 'raw')
+ # various PRELOADED_STATE JSON
+ preload_state = self._search_json(
+ r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
+ 'preload state', playlist_id, transform_source=js_to_json, default={})
+ # PRELOADED_STATE with current programmme
+ current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
+ programme_id = traverse_obj(current_programme, ('id', {str}))
+ if programme_id and current_programme.get('type') == 'playable_item':
+ title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
+ formats, subtitles = self._download_media_selector(programme_id)
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'formats': formats,
+ **traverse_obj(current_programme, {
+ 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
+ 'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
+ 'duration': ('duration', 'value', {int_or_none}),
+ 'uploader': ('network', 'short_title', {str}),
+ 'uploader_id': ('network', 'id', {str}),
+ 'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
+ 'series': ('titles', 'primary', {str}),
+ }),
+ 'subtitles': subtitles,
+ 'chapters': traverse_obj(preload_state, (
+ 'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
+ 'title': ('titles', {lambda x: join_nonempty(
+ 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
+ 'start_time': ('offset', 'start', {float_or_none}),
+ 'end_time': ('offset', 'end', {float_or_none}),
+ })
+ ),
+ }
+
+ # PWA_PRELOADED_STATE with article video asset
+ asset_id = traverse_obj(preload_state, (
+ 'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
+ 'assetVideo', 0, {str}, any))
+ if asset_id:
+ video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
+ if video_id:
+ article = traverse_obj(preload_state, (
+ 'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
+
+ def image_url(image_id):
+ return traverse_obj(preload_state, (
+ 'entities', 'images', image_id, 'url',
+ {lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
+
+ formats, subtitles = self._download_media_selector(video_id)
return {
- 'id': programme_id,
- 'title': title,
- 'description': dict_get(synopses, ('long', 'medium', 'short')),
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'uploader': network.get('short_title'),
- 'uploader_id': network.get('id'),
+ 'id': video_id,
+ **traverse_obj(preload_state, ('entities', 'videos', asset_id, {
+ 'title': ('title', {str}),
+ 'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
+ 'thumbnail': (0, {image_url}),
+ 'duration': ('duration', {int_or_none}),
+ })),
'formats': formats,
'subtitles': subtitles,
- 'chapters': traverse_obj(preload_state, (
- 'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
- 'title': ('titles', {lambda x: join_nonempty(
- 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
- 'start_time': ('offset', 'start', {float_or_none}),
- 'end_time': ('offset', 'end', {float_or_none}),
- })) or None,
+ 'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
}
+ else:
+ return self.url_result(
+ f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
+ asset_id, playlist_title, display_id=playlist_id,
+ description=playlist_description)
bbc3_config = self._parse_json(
self._search_regex(
@@ -1191,6 +1271,28 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
+ def parse_model(model):
+ """Extract single video from model structure"""
+ item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
+ if not item_id:
+ return
+ formats, subtitles = self._download_media_selector(item_id)
+ return {
+ 'id': item_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(model, {
+ 'title': ('title', {str}),
+ 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
+ 'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
+ 'duration': ('versions', 0, 'duration', {int}),
+ 'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
+ })
+ }
+
+ def is_type(*types):
+ return lambda _, v: v['type'] in types
+
initial_data = self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
'quoted preload state', default=None)
@@ -1202,6 +1304,19 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
if initial_data:
+ for video_data in traverse_obj(initial_data, (
+ 'stores', 'article', 'articleBodyContent', is_type('video'))):
+ model = traverse_obj(video_data, (
+ 'model', 'blocks', is_type('aresMedia'),
+ 'model', 'blocks', is_type('aresMediaMetadata'),
+ 'model', {dict}, any))
+ entry = parse_model(model)
+ if entry:
+ entries.append(entry)
+ if entries:
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
def parse_media(media):
if not media:
return
@@ -1234,27 +1349,90 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'subtitles': subtitles,
'timestamp': item_time,
'description': strip_or_none(item_desc),
+ 'duration': int_or_none(item.get('duration')),
})
- for resp in (initial_data.get('data') or {}).values():
- name = resp.get('name')
+
+ for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
+ name = resp['name']
if name == 'media-experience':
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
elif name == 'article':
- for block in (try_get(resp,
- (lambda x: x['data']['blocks'],
- lambda x: x['data']['content']['model']['blocks'],),
- list) or []):
- if block.get('type') not in ['media', 'video']:
- continue
- parse_media(block.get('model'))
+ for block in traverse_obj(resp, (
+ 'data', (None, ('content', 'model')), 'blocks',
+ is_type('media', 'video'), 'model', {dict})):
+ parse_media(block)
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
+ # extract from SIMORGH_DATA hydration JSON
+ simorgh_data = self._search_json(
+ r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
+ 'simorgh data', playlist_id, default={})
+ if simorgh_data:
+ done = False
+ for video_data in traverse_obj(simorgh_data, (
+ 'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
+ model = traverse_obj(video_data, (
+ 'model', 'blocks', is_type('aresMedia'),
+ 'model', 'blocks', is_type('aresMediaMetadata'),
+ 'model', {dict}, any))
+ if video_data['type'] == 'video':
+ entry = parse_model(model)
+ else: # legacyMedia: no duration, subtitles
+ block_id, entry = traverse_obj(model, ('blockId', {str})), None
+ media_data = traverse_obj(simorgh_data, (
+ 'pageData', 'promo', 'media',
+ {lambda x: x if x['id'] == block_id else None}))
+ formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
+ 'url': ('url', {url_or_none}),
+ 'ext': ('format', {str}),
+ 'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
+ }))
+ if formats:
+ entry = {
+ 'id': block_id,
+ 'display_id': playlist_id,
+ 'formats': formats,
+ 'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
+ **traverse_obj(model, {
+ 'title': ('title', {str}),
+ 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
+ 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
+ 'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
+ }),
+ }
+ done = True
+ if entry:
+ entries.append(entry)
+ if done:
+ break
+ if entries:
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
def extract_all(pattern):
return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False),
re.findall(pattern, webpage))))
+ # US accessed article with single embedded video (e.g.
+ # https://www.bbc.com/news/uk-68546268)
+ next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
+ ('props', 'pageProps', 'page'))
+ model = traverse_obj(next_data, (
+ ..., 'contents', is_type('video'),
+ 'model', 'blocks', is_type('media'),
+ 'model', 'blocks', is_type('mediaMetadata'),
+ 'model', {dict}, any))
+ if model and (entry := parse_model(model)):
+ if not entry.get('timestamp'):
+ entry['timestamp'] = traverse_obj(next_data, (
+ ..., 'contents', is_type('timestamp'), 'model',
+ 'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
+ entries.append(entry)
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
# Multiple video article (e.g.
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py
index 042b322..da98ac3 100644
--- a/yt_dlp/extractor/beeg.py
+++ b/yt_dlp/extractor/beeg.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
str_or_none,
diff --git a/yt_dlp/extractor/bibeltv.py b/yt_dlp/extractor/bibeltv.py
index 34464da..666b51c 100644
--- a/yt_dlp/extractor/bibeltv.py
+++ b/yt_dlp/extractor/bibeltv.py
@@ -1,4 +1,4 @@
-from functools import partial
+import functools
from .common import InfoExtractor
from ..utils import (
@@ -50,7 +50,7 @@ class BibelTVBaseIE(InfoExtractor):
**traverse_obj(data, {
'title': 'title',
'description': 'description',
- 'duration': ('duration', {partial(int_or_none, scale=1000)}),
+ 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
'timestamp': ('schedulingStart', {parse_iso8601}),
'season_number': 'seasonNumber',
'episode_number': 'episodeNumber',
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index fee4b29..b38c90b 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -93,11 +93,11 @@ class BilibiliBaseIE(InfoExtractor):
return formats
- def _download_playinfo(self, video_id, cid):
+ def _download_playinfo(self, video_id, cid, headers=None):
return self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
- note=f'Downloading video formats for cid {cid}')['data']
+ note=f'Downloading video formats for cid {cid}', headers=headers)['data']
def json2srt(self, json_data):
srt_data = ''
@@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage, urlh = self._download_webpage_handle(url, video_id)
+ headers = self.geo_verification_headers()
+ webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
if not self._match_valid_url(urlh.url):
return self.url_result(urlh.url)
@@ -531,7 +532,7 @@ class BiliBiliIE(BilibiliBaseIE):
self._download_json(
'https://api.bilibili.com/x/player/pagelist', video_id,
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
- note='Extracting videos in anthology'),
+ note='Extracting videos in anthology', headers=headers),
'data', expected_type=list) or []
is_anthology = len(page_list_json) > 1
@@ -552,7 +553,7 @@ class BiliBiliIE(BilibiliBaseIE):
festival_info = {}
if is_festival:
- play_info = self._download_playinfo(video_id, cid)
+ play_info = self._download_playinfo(video_id, cid, headers=headers)
festival_info = traverse_obj(initial_state, {
'uploader': ('videoInfo', 'upName'),
@@ -666,14 +667,15 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
def _real_extract(self, url):
episode_id = self._match_id(url)
- webpage = self._download_webpage(url, episode_id)
+ headers = self.geo_verification_headers()
+ webpage = self._download_webpage(url, episode_id, headers=headers)
if '您所在的地区无法观看本片' in webpage:
raise GeoRestrictedError('This video is restricted')
elif '正在观看预览,大会员免费看全片' in webpage:
self.raise_login_required('This video is for premium members only')
- headers = {'Referer': url, **self.geo_verification_headers()}
+ headers['Referer'] = url
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
@@ -724,7 +726,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
'__post_extractor': self.extract_comments(aid),
- 'http_headers': headers,
+ 'http_headers': {'Referer': url},
}
@@ -1043,15 +1045,17 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
try:
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
- playlist_id, note=f'Downloading page {page_idx}', query=query)
+ playlist_id, note=f'Downloading page {page_idx}', query=query,
+ headers={'referer': url})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise
- if response['code'] == -401:
+ if response['code'] in (-352, -401):
raise ExtractorError(
- 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+ f'Request is blocked by server ({-response["code"]}), '
+ 'please add cookies, wait and try later.', expected=True)
return response['data']
def get_metadata(page_data):
diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py
index e875957..aa3d63e 100644
--- a/yt_dlp/extractor/bleacherreport.py
+++ b/yt_dlp/extractor/bleacherreport.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .amp import AMPIE
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py
index 3d6e033..ef0151d 100644
--- a/yt_dlp/extractor/blogger.py
+++ b/yt_dlp/extractor/blogger.py
@@ -1,3 +1,4 @@
+from .common import InfoExtractor
from ..utils import (
mimetype2ext,
parse_duration,
@@ -5,7 +6,6 @@ from ..utils import (
str_or_none,
traverse_obj,
)
-from .common import InfoExtractor
class BloggerIE(InfoExtractor):
diff --git a/yt_dlp/extractor/boosty.py b/yt_dlp/extractor/boosty.py
index fb14ca1..d3aab7a 100644
--- a/yt_dlp/extractor/boosty.py
+++ b/yt_dlp/extractor/boosty.py
@@ -1,7 +1,11 @@
+import json
+import urllib.parse
+
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
ExtractorError,
+ bug_reports_message,
int_or_none,
qualities,
str_or_none,
@@ -162,9 +166,19 @@ class BoostyIE(InfoExtractor):
def _real_extract(self, url):
user, post_id = self._match_valid_url(url).group('user', 'post_id')
+
+ auth_headers = {}
+ auth_cookie = self._get_cookies('https://boosty.to/').get('auth')
+ if auth_cookie is not None:
+ try:
+ auth_data = json.loads(urllib.parse.unquote(auth_cookie.value))
+ auth_headers['Authorization'] = f'Bearer {auth_data["accessToken"]}'
+ except (json.JSONDecodeError, KeyError):
+ self.report_warning(f'Failed to extract token from auth cookie{bug_reports_message()}')
+
post = self._download_json(
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
- note='Downloading post data', errnote='Unable to download post data')
+ note='Downloading post data', errnote='Unable to download post data', headers=auth_headers)
post_title = post.get('title')
if not post_title:
@@ -202,7 +216,9 @@ class BoostyIE(InfoExtractor):
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
}, get_all=False)})
- if not entries:
+ if not entries and not post.get('hasAccess'):
+ self.raise_login_required('This post requires a subscription', metadata_available=True)
+ elif not entries:
raise ExtractorError('No videos found', expected=True)
if len(entries) == 1:
return entries[0]
diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py
index 92f8ea2..2675866 100644
--- a/yt_dlp/extractor/bostonglobe.py
+++ b/yt_dlp/extractor/bostonglobe.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import (
extract_attributes,
)
diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py
index 7281b3c..008c011 100644
--- a/yt_dlp/extractor/box.py
+++ b/yt_dlp/extractor/box.py
@@ -3,6 +3,7 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
parse_iso8601,
update_url_query,
url_or_none,
@@ -11,8 +12,8 @@ from ..utils.traversal import traverse_obj
class BoxIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)/file/(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
+ _TESTS = [{
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
'info_dict': {
@@ -25,14 +26,36 @@ class BoxIE(InfoExtractor):
'uploader_id': '235196876',
},
'params': {'skip_download': 'dash fragment too small'},
- }
+ }, {
+ 'url': 'https://utexas.app.box.com/s/2x6vanv85fdl8j2eqlcxmv0gp1wvps6e',
+ 'info_dict': {
+ 'id': '787379022466',
+ 'ext': 'mp4',
+ 'title': 'Webinar recording: Take the Leap!.mp4',
+ 'uploader': 'Patricia Mosele',
+ 'timestamp': 1615824864,
+ 'upload_date': '20210315',
+ 'uploader_id': '239068974',
+ },
+ 'params': {'skip_download': 'dash fragment too small'},
+ }]
def _real_extract(self, url):
shared_name, file_id = self._match_valid_url(url).groups()
- webpage = self._download_webpage(url, file_id)
- request_token = self._parse_json(self._search_regex(
- r'Box\.config\s*=\s*({.+?});', webpage,
- 'Box config'), file_id)['requestToken']
+ webpage = self._download_webpage(url, file_id or shared_name)
+
+ if not file_id:
+ post_stream_data = self._search_json(
+ r'Box\.postStreamData\s*=', webpage, 'Box post-stream data', shared_name)
+ shared_item = traverse_obj(
+ post_stream_data, ('/app-api/enduserapp/shared-item', {dict})) or {}
+ if shared_item.get('itemType') != 'file':
+ raise ExtractorError('The requested resource is not a file', expected=True)
+
+ file_id = str(shared_item['itemID'])
+
+ request_token = self._search_json(
+ r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
access_token = self._download_json(
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
'Downloading token JSON metadata',
diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py
index 51f9eb7..da06cc3 100644
--- a/yt_dlp/extractor/boxcast.py
+++ b/yt_dlp/extractor/boxcast.py
@@ -1,9 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- js_to_json,
- traverse_obj,
- unified_timestamp
-)
+from ..utils import js_to_json, traverse_obj, unified_timestamp
class BoxCastVideoIE(InfoExtractor):
diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py
index 1200437..04b1dd8 100644
--- a/yt_dlp/extractor/brainpop.py
+++ b/yt_dlp/extractor/brainpop.py
@@ -6,7 +6,7 @@ from ..utils import (
classproperty,
int_or_none,
traverse_obj,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index 61b1841..4190e1a 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -12,10 +12,11 @@ from ..compat import (
)
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
+ UnsupportedError,
clean_html,
dict_get,
extract_attributes,
- ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
@@ -29,7 +30,6 @@ from ..utils import (
try_get,
unescapeHTML,
unsmuggle_url,
- UnsupportedError,
update_url_query,
url_or_none,
)
diff --git a/yt_dlp/extractor/brilliantpala.py b/yt_dlp/extractor/brilliantpala.py
index 0bf8622..950a70a 100644
--- a/yt_dlp/extractor/brilliantpala.py
+++ b/yt_dlp/extractor/brilliantpala.py
@@ -27,8 +27,17 @@ class BrilliantpalaBaseIE(InfoExtractor):
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
def _perform_login(self, username, password):
- login_form = self._hidden_inputs(self._download_webpage(
- self._LOGIN_API, None, 'Downloading login page'))
+ login_page, urlh = self._download_webpage_handle(
+ self._LOGIN_API, None, 'Downloading login page', expected_status=401)
+ if urlh.status != 401 and not urlh.url.startswith(self._LOGIN_API):
+ self.write_debug('Cookies are valid, no login required.')
+ return
+
+ if urlh.status == 401:
+ self.write_debug('Got HTTP Error 401; cookies have been invalidated')
+ login_page = self._download_webpage(self._LOGIN_API, None, 'Re-downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
login_form.update({
'username': username,
'password': password,
diff --git a/yt_dlp/extractor/bundestag.py b/yt_dlp/extractor/bundestag.py
index 9fd7c7d..71f7726 100644
--- a/yt_dlp/extractor/bundestag.py
+++ b/yt_dlp/extractor/bundestag.py
@@ -1,5 +1,5 @@
+import functools
import re
-from functools import partial
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
@@ -115,9 +115,9 @@ class BundestagIE(InfoExtractor):
note='Downloading metadata overlay', fatal=False,
), {
'title': (
- {partial(get_element_text_and_html_by_tag, 'h3')}, 0,
- {partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
- 'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
+ {functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0,
+ {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
+ 'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
}))
return result
diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py
deleted file mode 100644
index 4a22141..0000000
--- a/yt_dlp/extractor/cableav.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from .common import InfoExtractor
-
-
-class CableAVIE(InfoExtractor):
- _VALID_URL = r'https?://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
- _TESTS = [{
- 'url': 'https://cableav.tv/lS4iR9lWjN8/',
- 'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
- 'info_dict': {
- 'id': 'lS4iR9lWjN8',
- 'ext': 'mp4',
- 'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
- 'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_url = self._og_search_video_url(webpage, secure=False)
-
- formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'formats': formats,
- }
diff --git a/yt_dlp/extractor/caffeinetv.py b/yt_dlp/extractor/caffeinetv.py
new file mode 100644
index 0000000..aa107f8
--- /dev/null
+++ b/yt_dlp/extractor/caffeinetv.py
@@ -0,0 +1,74 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ traverse_obj,
+ urljoin,
+)
+
+
+class CaffeineTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)'
+ _TESTS = [{
+ 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
+ 'info_dict': {
+ 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
+ 'ext': 'mp4',
+ 'title': 'GOOOOD MORNINNNNN #highlights',
+ 'timestamp': 1654702180,
+ 'upload_date': '20220608',
+ 'uploader': 'RahJON Wicc',
+ 'uploader_id': 'TsuSurf',
+ 'duration': 3145,
+ 'age_limit': 17,
+ 'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'tags': ['highlights', 'battlerap'],
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id)
+ broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {}
+
+ video_url = broadcast_info['video_url']
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
+ else:
+ formats = [{'url': video_url}]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ **traverse_obj(json_data, {
+ 'like_count': ('like_count', {int_or_none}),
+ 'view_count': ('view_count', {int_or_none}),
+ 'comment_count': ('comment_count', {int_or_none}),
+ 'tags': ('tags', ..., {str}, {lambda x: x or None}),
+ 'uploader': ('user', 'name', {str}),
+ 'uploader_id': (((None, 'user'), 'username'), {str}, any),
+ 'is_live': ('is_live', {bool}),
+ }),
+ **traverse_obj(broadcast_info, {
+ 'title': ('broadcast_title', {str}),
+ 'duration': ('content_duration', {int_or_none}),
+ 'timestamp': ('broadcast_start_time', {parse_iso8601}),
+ 'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
+ }),
+ 'age_limit': {
+ # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
+ 'FOUR_PLUS': 0,
+ 'NINE_PLUS': 9,
+ 'TWELVE_PLUS': 12,
+ 'SEVENTEEN_PLUS': 17,
+ }.get(broadcast_info.get('content_rating'), 17),
+ }
diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py
index df5ca58..745e695 100644
--- a/yt_dlp/extractor/canalalpha.py
+++ b/yt_dlp/extractor/canalalpha.py
@@ -40,7 +40,7 @@ class CanalAlphaIE(InfoExtractor):
'id': '24484',
'ext': 'mp4',
'title': 'Ces innovations qui veulent rendre l’agriculture plus durable',
- 'description': 'md5:3de3f151180684621e85be7c10e4e613',
+ 'description': 'md5:85d594a3b5dc6ccfc4a85aba6e73b129',
'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg',
'upload_date': '20211026',
'duration': 360,
@@ -58,14 +58,25 @@ class CanalAlphaIE(InfoExtractor):
'duration': 360,
},
'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura',
+ 'info_dict': {
+ 'id': '33500',
+ 'ext': 'mp4',
+ 'title': 'Encore des mesures d\'économie dans le Jura',
+ 'description': 'md5:938b5b556592f2d1b9ab150268082a80',
+ 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_46665.jpg',
+ 'upload_date': '20240411',
+ 'duration': 105,
+ },
}]
def _real_extract(self, url):
- id = self._match_id(url)
- webpage = self._download_webpage(url, id)
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
data_json = self._parse_json(self._search_regex(
r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;',
- webpage, 'data_json'), id)['1']['data']['data']
+ webpage, 'data_json'), video_id)['1']['data']['data']
manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {}
subtitles = {}
formats = [{
@@ -75,15 +86,17 @@ class CanalAlphaIE(InfoExtractor):
'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
} for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
if manifests.get('hls'):
- m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id)
- formats.extend(m3u8_frmts)
- subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ manifests['hls'], video_id, m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
if manifests.get('dash'):
- dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
- formats.extend(dash_frmts)
- subtitles = self._merge_subtitles(subtitles, dash_subs)
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ manifests['dash'], video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
return {
- 'id': id,
+ 'id': video_id,
'title': data_json.get('title').strip(),
'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))),
'thumbnail': data_json.get('poster'),
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index b5beb1e..a418026 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -151,7 +151,7 @@ class CBCIE(InfoExtractor):
class CBCPlayerIE(InfoExtractor):
IE_NAME = 'cbc.ca:player'
- _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
+ _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c',
@@ -166,8 +166,51 @@ class CBCPlayerIE(InfoExtractor):
},
'skip': 'Geo-restricted to Canada and no longer available',
}, {
+ 'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2657631896',
+ 'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
+ 'info_dict': {
+ 'id': '2657631896',
+ 'ext': 'mp3',
+ 'title': 'CBC Montreal is organizing its first ever community hackathon!',
+ 'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
+ 'timestamp': 1425704400,
+ 'upload_date': '20150307',
+ 'uploader': 'CBCC-NEW',
+ 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
+ 'chapters': [],
+ 'duration': 494.811,
+ 'categories': ['AudioMobile/All in a Weekend Montreal'],
+ 'tags': 'count:8',
+ 'location': 'Quebec',
+ 'series': 'All in a Weekend Montreal',
+ 'season': 'Season 2015',
+ 'season_number': 2015,
+ 'media_type': 'Excerpt',
+ },
+ }, {
+ 'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
+ 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
+ 'info_dict': {
+ 'id': '2164402062',
+ 'ext': 'mp4',
+ 'title': 'Cancer survivor four times over',
+ 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
+ 'timestamp': 1320410746,
+ 'upload_date': '20111104',
+ 'uploader': 'CBCC-NEW',
+ 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
+ 'chapters': [],
+ 'duration': 186.867,
+ 'series': 'CBC News: Windsor at 6:00',
+ 'categories': ['News/Canada/Windsor'],
+ 'location': 'Windsor',
+ 'tags': ['cancer'],
+ 'creators': ['Allison Johnson'],
+ 'media_type': 'Excerpt',
+ },
+ }, {
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
- 'url': 'http://www.cbc.ca/player/play/2657631896',
+ 'url': 'https://www.cbc.ca/player/play/1.2985700',
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
'info_dict': {
'id': '2657631896',
@@ -189,7 +232,7 @@ class CBCPlayerIE(InfoExtractor):
'media_type': 'Excerpt',
},
}, {
- 'url': 'http://www.cbc.ca/player/play/2164402062',
+ 'url': 'https://www.cbc.ca/player/play/1.1711287',
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
'info_dict': {
'id': '2164402062',
@@ -206,38 +249,75 @@ class CBCPlayerIE(InfoExtractor):
'categories': ['News/Canada/Windsor'],
'location': 'Windsor',
'tags': ['cancer'],
- 'creator': 'Allison Johnson',
+ 'creators': ['Allison Johnson'],
'media_type': 'Excerpt',
},
}, {
# Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
- 'url': 'http://www.cbc.ca/player/play/2284799043667',
- 'md5': '9b49f0839e88b6ec0b01d840cf3d42b5',
+ 'url': 'https://www.cbc.ca/player/play/1.7159484',
+ 'md5': '6ed6cd0fc2ef568d2297ba68a763d455',
'info_dict': {
- 'id': '2284799043667',
+ 'id': '2324213316001',
'ext': 'mp4',
- 'title': 'The National | Hockey coach charged, Green grants, Safer drugs',
- 'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa',
- 'timestamp': 1700272800,
- 'duration': 2718.833,
+ 'title': 'The National | School boards sue social media giants',
+ 'description': 'md5:4b4db69322fa32186c3ce426da07402c',
+ 'timestamp': 1711681200,
+ 'duration': 2743.400,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
- 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg',
+ 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg',
'uploader': 'CBCC-NEW',
'chapters': 'count:5',
- 'upload_date': '20231118',
+ 'upload_date': '20240329',
'categories': 'count:4',
'series': 'The National - Full Show',
'tags': 'count:1',
- 'creator': 'News',
+ 'creators': ['News'],
'location': 'Canada',
'media_type': 'Full Program',
},
+ }, {
+ 'url': 'https://www.cbc.ca/player/play/video/1.7194274',
+ 'md5': '188b96cf6bdcb2540e178a6caa957128',
+ 'info_dict': {
+ 'id': '2334524995812',
+ 'ext': 'mp4',
+ 'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
+ 'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
+ 'timestamp': 1714788791,
+ 'duration': 77.678,
+ 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
+ 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
+ 'uploader': 'CBCC-NEW',
+ 'chapters': 'count:0',
+ 'upload_date': '20240504',
+ 'categories': 'count:3',
+ 'series': 'The National',
+ 'tags': 'count:15',
+ 'creators': ['encoder'],
+ 'location': 'Canada',
+ 'media_type': 'Excerpt',
+ },
+ }, {
+ 'url': 'cbcplayer:1.7159484',
+ 'only_matching': True,
+ }, {
+ 'url': 'cbcplayer:2164402062',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.cbc.ca/player/play/2657631896',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
+ if '.' in video_id:
+ webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
+ video_id = self._search_json(
+ r'window\.__INITIAL_STATE__\s*=', webpage,
+ 'initial state', video_id)['video']['currentClip']['mediaId']
+
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py
index cf83021..aca9782 100644
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@@ -5,14 +5,14 @@ from .youtube import YoutubeIE
from ..utils import (
ExtractorError,
extract_attributes,
+ find_xpath_attr,
get_element_html_by_id,
int_or_none,
- find_xpath_attr,
smuggle_url,
- xpath_element,
- xpath_text,
update_url_query,
url_or_none,
+ xpath_element,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py
index 1157114..0a5a524 100644
--- a/yt_dlp/extractor/cda.py
+++ b/yt_dlp/extractor/cda.py
@@ -1,6 +1,6 @@
import base64
import codecs
-import datetime
+import datetime as dt
import hashlib
import hmac
import json
@@ -16,7 +16,6 @@ from ..utils import (
merge_dicts,
multipart_encode,
parse_duration,
- random_birthday,
traverse_obj,
try_call,
try_get,
@@ -63,38 +62,57 @@ class CDAIE(InfoExtractor):
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'crash404',
- 'view_count': int,
'average_rating': float,
'duration': 137,
'age_limit': 0,
+ 'upload_date': '20160220',
+ 'timestamp': 1455968218,
}
}, {
- # Age-restricted
- 'url': 'http://www.cda.pl/video/1273454c4',
+ # Age-restricted with vfilm redirection
+ 'url': 'https://www.cda.pl/video/8753244c4',
+ 'md5': 'd8eeb83d63611289507010d3df3bb8b3',
'info_dict': {
- 'id': '1273454c4',
+ 'id': '8753244c4',
'ext': 'mp4',
- 'title': 'Bronson (2008) napisy HD 1080p',
- 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
+ 'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
+ 'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
'height': 1080,
- 'uploader': 'boniek61',
+ 'uploader': 'arhn eu',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 5554,
+ 'duration': 991,
'age_limit': 18,
- 'view_count': int,
'average_rating': float,
- },
+ 'timestamp': 1633888264,
+ 'upload_date': '20211010',
+ }
+ }, {
+ # Age-restricted without vfilm redirection
+ 'url': 'https://www.cda.pl/video/17028157b8',
+ 'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
+ 'info_dict': {
+ 'id': '17028157b8',
+ 'ext': 'mp4',
+ 'title': 'STENDUPY MICHAŁ OGIŃSKI',
+ 'description': 'md5:5851f3272bfc31f762d616040a1d609a',
+ 'height': 480,
+ 'uploader': 'oginski',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 18855,
+ 'age_limit': 18,
+ 'average_rating': float,
+ 'timestamp': 1699705901,
+ 'upload_date': '20231111',
+ }
}, {
'url': 'http://ebd.cda.pl/0x0/5749950c',
'only_matching': True,
}]
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
- form_data = random_birthday('rok', 'miesiac', 'dzien')
- form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
- data, content_type = multipart_encode(form_data)
+ data, content_type = multipart_encode({'age_confirm': ''})
return self._download_webpage(
- urljoin(url, '/a/validatebirth'), video_id, *args,
+ url, video_id, *args,
data=data, headers={
'Referer': url,
'Content-Type': content_type,
@@ -134,7 +152,7 @@ class CDAIE(InfoExtractor):
self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
- if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
+ if cached_bearer.get('valid_until', 0) > dt.datetime.now().timestamp() + 5:
self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
return
@@ -154,7 +172,7 @@ class CDAIE(InfoExtractor):
})
self.cache.store(self._BEARER_CACHE, username, {
'token': token_res['access_token'],
- 'valid_until': token_res['expires_in'] + datetime.datetime.now().timestamp(),
+ 'valid_until': token_res['expires_in'] + dt.datetime.now().timestamp(),
})
self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'
@@ -164,7 +182,7 @@ class CDAIE(InfoExtractor):
if 'Authorization' in self._API_HEADERS:
return self._api_extract(video_id)
else:
- return self._web_extract(video_id, url)
+ return self._web_extract(video_id)
def _api_extract(self, video_id):
meta = self._download_json(
@@ -197,9 +215,9 @@ class CDAIE(InfoExtractor):
'view_count': meta.get('views'),
}
- def _web_extract(self, video_id, url):
+ def _web_extract(self, video_id):
self._set_cookie('cda.pl', 'cda.player', 'html5')
- webpage = self._download_webpage(
+ webpage, urlh = self._download_webpage_handle(
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
@@ -209,10 +227,10 @@ class CDAIE(InfoExtractor):
self.raise_geo_restricted()
need_confirm_age = False
- if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
+ if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
webpage, 'birthday validate form', default=None):
webpage = self._download_age_confirm_page(
- url, video_id, note='Confirming age')
+ urlh.url, video_id, note='Confirming age')
need_confirm_age = True
formats = []
@@ -222,9 +240,6 @@ class CDAIE(InfoExtractor):
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
''', webpage, 'uploader', default=None, group='uploader')
- view_count = self._search_regex(
- r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
- 'view_count', default=None)
average_rating = self._search_regex(
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
@@ -235,7 +250,6 @@ class CDAIE(InfoExtractor):
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'uploader': uploader,
- 'view_count': int_or_none(view_count),
'average_rating': float_or_none(average_rating),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py
index 156b6a3..5d63357 100644
--- a/yt_dlp/extractor/ceskatelevize.py
+++ b/yt_dlp/extractor/ceskatelevize.py
@@ -101,7 +101,7 @@ class CeskaTelevizeIE(InfoExtractor):
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title:
- playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
+ playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0]
playlist_description = self._og_search_description(webpage, default=None)
if playlist_description:
playlist_description = playlist_description.replace('\xa0', ' ')
diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py
index 9cffa11..745b71f 100644
--- a/yt_dlp/extractor/cinetecamilano.py
+++ b/yt_dlp/extractor/cinetecamilano.py
@@ -1,4 +1,5 @@
import json
+
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py
index 006a713..67b56e0 100644
--- a/yt_dlp/extractor/clippit.py
+++ b/yt_dlp/extractor/clippit.py
@@ -1,11 +1,11 @@
+import re
+
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
qualities,
)
-import re
-
class ClippitIE(InfoExtractor):
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index e776cca..1d2c443 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1,5 +1,6 @@
import base64
import collections
+import functools
import getpass
import hashlib
import http.client
@@ -21,7 +22,6 @@ import urllib.parse
import urllib.request
import xml.etree.ElementTree
-from ..compat import functools # isort: split
from ..compat import (
compat_etree_fromstring,
compat_expanduser,
@@ -37,6 +37,7 @@ from ..networking.exceptions import (
IncompleteRead,
network_exceptions,
)
+from ..networking.impersonate import ImpersonateTarget
from ..utils import (
IDENTITY,
JSON_LD_RE,
@@ -170,12 +171,12 @@ class InfoExtractor:
Automatically calculated from width and height
* dynamic_range The dynamic range of the video. One of:
"SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
- * tbr Average bitrate of audio and video in KBit/s
- * abr Average audio bitrate in KBit/s
+ * tbr Average bitrate of audio and video in kbps (1000 bits/sec)
+ * abr Average audio bitrate in kbps (1000 bits/sec)
* acodec Name of the audio codec in use
* asr Audio sampling rate in Hertz
* audio_channels Number of audio channels
- * vbr Average video bitrate in KBit/s
+ * vbr Average video bitrate in kbps (1000 bits/sec)
* fps Frame rate
* vcodec Name of the video codec in use
* container Name of the container format
@@ -246,7 +247,8 @@ class InfoExtractor:
* downloader_options A dictionary of downloader options
(For internal use only)
* http_chunk_size Chunk size for HTTP downloads
- * ffmpeg_args Extra arguments for ffmpeg downloader
+ * ffmpeg_args Extra arguments for ffmpeg downloader (input)
+ * ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
RTMP formats can also have the additional fields: page_url,
@@ -817,7 +819,7 @@ class InfoExtractor:
else:
return err.status in variadic(expected_status)
- def _create_request(self, url_or_request, data=None, headers=None, query=None):
+ def _create_request(self, url_or_request, data=None, headers=None, query=None, extensions=None):
if isinstance(url_or_request, urllib.request.Request):
self._downloader.deprecation_warning(
'Passing a urllib.request.Request to _create_request() is deprecated. '
@@ -826,10 +828,11 @@ class InfoExtractor:
elif not isinstance(url_or_request, Request):
url_or_request = Request(url_or_request)
- url_or_request.update(data=data, headers=headers, query=query)
+ url_or_request.update(data=data, headers=headers, query=query, extensions=extensions)
return url_or_request
- def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None,
+ headers=None, query=None, expected_status=None, impersonate=None, require_impersonation=False):
"""
Return the response handle.
@@ -860,8 +863,31 @@ class InfoExtractor:
headers = (headers or {}).copy()
headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
+ extensions = {}
+
+ if impersonate in (True, ''):
+ impersonate = ImpersonateTarget()
+ requested_targets = [
+ t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
+ for t in variadic(impersonate)
+ ] if impersonate else []
+
+ available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None)
+ if available_target:
+ extensions['impersonate'] = available_target
+ elif requested_targets:
+ message = 'The extractor is attempting impersonation, but '
+ message += (
+ 'no impersonate target is available' if not str(impersonate)
+ else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"')
+ info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation '
+ 'for information on installing the required dependencies')
+ if require_impersonation:
+ raise ExtractorError(f'{message}; {info_msg}', expected=True)
+ self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True)
+
try:
- return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
+ return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions))
except network_exceptions as err:
if isinstance(err, HTTPError):
if self.__can_accept_status_code(err, expected_status):
@@ -880,13 +906,14 @@ class InfoExtractor:
return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
- encoding=None, data=None, headers={}, query={}, expected_status=None):
+ encoding=None, data=None, headers={}, query={}, expected_status=None,
+ impersonate=None, require_impersonation=False):
"""
Return a tuple (page content as string, URL handle).
Arguments:
url_or_request -- plain text URL as a string or
- a urllib.request.Request object
+ a yt_dlp.networking.Request object
video_id -- Video/playlist/item identifier (string)
Keyword arguments:
@@ -911,17 +938,27 @@ class InfoExtractor:
returning True if it should be accepted
Note that this argument does not affect success status codes (2xx)
which are always accepted.
+ impersonate -- the impersonate target. Can be any of the following entities:
+ - an instance of yt_dlp.networking.impersonate.ImpersonateTarget
+ - a string in the format of CLIENT[:OS]
+ - a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances
+ - a boolean value; True means any impersonate target is sufficient
+ require_impersonation -- flag to toggle whether the request should raise an error
+ if impersonation is not possible (bool, default: False)
"""
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, str):
url_or_request = url_or_request.partition('#')[0]
- urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
+ urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data,
+ headers=headers, query=query, expected_status=expected_status,
+ impersonate=impersonate, require_impersonation=require_impersonation)
if urlh is False:
assert not fatal
return False
- content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+ content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
+ encoding=encoding, data=data)
return (content, urlh)
@staticmethod
@@ -969,8 +1006,10 @@ class InfoExtractor:
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
- def _request_dump_filename(self, url, video_id):
- basen = f'{video_id}_{url}'
+ def _request_dump_filename(self, url, video_id, data=None):
+ if data is not None:
+ data = hashlib.md5(data).hexdigest()
+ basen = join_nonempty(video_id, data, url, delim='_')
trim_length = self.get_param('trim_file_name') or 240
if len(basen) > trim_length:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
@@ -992,7 +1031,8 @@ class InfoExtractor:
except LookupError:
return webpage_bytes.decode('utf-8', 'replace')
- def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
+ prefix=None, encoding=None, data=None):
webpage_bytes = urlh.read()
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
@@ -1001,7 +1041,9 @@ class InfoExtractor:
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
if self.get_param('write_pages'):
- filename = self._request_dump_filename(urlh.url, video_id)
+ if isinstance(url_or_request, Request):
+ data = self._create_request(url_or_request, data).data
+ filename = self._request_dump_filename(urlh.url, video_id, data)
self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
@@ -1046,20 +1088,23 @@ class InfoExtractor:
return getattr(ie, parser)(content, *args, **kwargs)
def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
+ impersonate=None, require_impersonation=False):
res = self._download_webpage_handle(
url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query, expected_status=expected_status)
+ data=data, headers=headers, query=query, expected_status=expected_status,
+ impersonate=impersonate, require_impersonation=require_impersonation)
if res is False:
return res
content, urlh = res
return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
+ impersonate=None, require_impersonation=False):
if self.get_param('load_pages'):
url_or_request = self._create_request(url_or_request, data, headers, query)
- filename = self._request_dump_filename(url_or_request.url, video_id)
+ filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
self.to_screen(f'Loading request from {filename}')
try:
with open(filename, 'rb') as dumpf:
@@ -1079,6 +1124,8 @@ class InfoExtractor:
'headers': headers,
'query': query,
'expected_status': expected_status,
+ 'impersonate': impersonate,
+ 'require_impersonation': require_impersonation,
}
if parser is None:
kwargs.pop('transform_source')
@@ -1697,12 +1744,16 @@ class InfoExtractor:
traverse_json_ld(json_ld)
return filter_dict(info)
- def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
- return self._parse_json(
- self._search_regex(
- r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
- webpage, 'next.js data', fatal=fatal, **kw),
- video_id, transform_source=transform_source, fatal=fatal)
+ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
+ if default == '{}':
+ self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
+ default = {}
+ if default is not NO_DEFAULT:
+ fatal = False
+
+ return self._search_json(
+ r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
+ video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
@@ -2400,7 +2451,7 @@ class InfoExtractor:
})
continue
- src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
+ src_url = src if src.startswith('http') else urllib.parse.urljoin(f'{base}/', src)
src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8':
@@ -3333,23 +3384,16 @@ class InfoExtractor:
return formats
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
- mobj = re.search(
- r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
- webpage)
- if mobj:
- try:
- jwplayer_data = self._parse_json(mobj.group('options'),
- video_id=video_id,
- transform_source=transform_source)
- except ExtractorError:
- pass
- else:
- if isinstance(jwplayer_data, dict):
- return jwplayer_data
-
- def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+ return self._search_json(
+ r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
+ webpage, 'JWPlayer data', video_id,
+ # must be a {...} or sequence, ending
+ contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))',
+ transform_source=transform_source, default=None)
+
+ def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs):
jwplayer_data = self._find_jwplayer_data(
- webpage, video_id, transform_source=js_to_json)
+ webpage, video_id, transform_source=transform_source)
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)
@@ -3381,22 +3425,14 @@ class InfoExtractor:
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
subtitles = {}
- tracks = video_data.get('tracks')
- if tracks and isinstance(tracks, list):
- for track in tracks:
- if not isinstance(track, dict):
- continue
- track_kind = track.get('kind')
- if not track_kind or not isinstance(track_kind, str):
- continue
- if track_kind.lower() not in ('captions', 'subtitles'):
- continue
- track_url = urljoin(base_url, track.get('file'))
- if not track_url:
- continue
- subtitles.setdefault(track.get('label') or 'en', []).append({
- 'url': self._proto_relative_url(track_url)
- })
+ for track in traverse_obj(video_data, (
+ 'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))):
+ track_url = urljoin(base_url, track.get('file'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('label') or 'en', []).append({
+ 'url': self._proto_relative_url(track_url)
+ })
entry = {
'id': this_video_id,
@@ -3481,7 +3517,7 @@ class InfoExtractor:
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
# of jwplayer.flash.swf
rtmp_url_parts = re.split(
- r'((?:mp4|mp3|flv):)', source_url, 1)
+ r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
if len(rtmp_url_parts) == 3:
rtmp_url, prefix, play_path = rtmp_url_parts
a_format.update({
diff --git a/yt_dlp/extractor/commonmistakes.py b/yt_dlp/extractor/commonmistakes.py
index 1d3b61c..4514424 100644
--- a/yt_dlp/extractor/commonmistakes.py
+++ b/yt_dlp/extractor/commonmistakes.py
@@ -40,3 +40,19 @@ class UnicodeBOMIE(InfoExtractor):
'Your URL starts with a Byte Order Mark (BOM). '
'Removing the BOM and looking for "%s" ...' % real_url)
return self.url_result(real_url)
+
+
+class BlobIE(InfoExtractor):
+ IE_DESC = False
+ _VALID_URL = r'blob:'
+
+ _TESTS = [{
+ 'url': 'blob:https://www.youtube.com/4eb3d090-a761-46e6-8083-c32016a36e3b',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ 'You\'ve asked yt-dlp to download a blob URL. '
+ 'A blob URL exists only locally in your browser. '
+ 'It is not possible for yt-dlp to access it.', expected=True)
diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py
index bcc34dd..0a98c98 100644
--- a/yt_dlp/extractor/corus.py
+++ b/yt_dlp/extractor/corus.py
@@ -1,7 +1,7 @@
from .theplatform import ThePlatformFeedIE
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
float_or_none,
int_or_none,
)
diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py
index 1ef90b5..0cb7d94 100644
--- a/yt_dlp/extractor/crackle.py
+++ b/yt_dlp/extractor/crackle.py
@@ -6,6 +6,7 @@ import time
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
determine_ext,
float_or_none,
int_or_none,
@@ -13,7 +14,6 @@ from ..utils import (
parse_age_limit,
parse_duration,
url_or_none,
- ExtractorError
)
diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index 8d997de..ea54f01 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -1,18 +1,19 @@
import base64
+import uuid
from .common import InfoExtractor
+from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
float_or_none,
format_field,
int_or_none,
- join_nonempty,
+ jwt_decode_hs256,
parse_age_limit,
parse_count,
parse_iso8601,
qualities,
- remove_start,
time_seconds,
traverse_obj,
url_or_none,
@@ -24,10 +25,16 @@ class CrunchyrollBaseIE(InfoExtractor):
_BASE_URL = 'https://www.crunchyroll.com'
_API_BASE = 'https://api.crunchyroll.com'
_NETRC_MACHINE = 'crunchyroll'
+ _SWITCH_USER_AGENT = 'Crunchyroll/1.8.0 Nintendo Switch/12.3.12.0 UE4/4.27'
+ _REFRESH_TOKEN = None
_AUTH_HEADERS = None
+ _AUTH_EXPIRY = None
_API_ENDPOINT = None
- _BASIC_AUTH = None
- _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
+ _BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
+ 't-kdgp2h8c3jub8fn0fq',
+ 'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
+ )).encode()).decode()
+ _IS_PREMIUM = None
_LOCALE_LOOKUP = {
'ar': 'ar-SA',
'de': 'de-DE',
@@ -42,63 +49,78 @@ class CrunchyrollBaseIE(InfoExtractor):
'hi': 'hi-IN',
}
- @property
- def is_logged_in(self):
- return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
+ def _set_auth_info(self, response):
+ CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
+ CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
+ CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
+
+ def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
+ try:
+ return self._download_json(
+ f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
+ headers=headers, data=urlencode_postdata(data), impersonate=True)
+ except ExtractorError as error:
+ if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
+ raise
+ if target := error.cause.response.extensions.get('impersonate'):
+ raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
+ raise ExtractorError(
+ 'Request blocked by Cloudflare. '
+ 'Install the required impersonation dependency if possible, '
+ 'or else navigate to Crunchyroll in your browser, '
+ 'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
+ 'and your browser\'s User-Agent (with --user-agent)', expected=True)
def _perform_login(self, username, password):
- if self.is_logged_in:
+ if not CrunchyrollBaseIE._REFRESH_TOKEN:
+ CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
+ if CrunchyrollBaseIE._REFRESH_TOKEN:
return
- upsell_response = self._download_json(
- f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
- query={
- 'sess_id': 1,
- 'device_id': 'whatvalueshouldbeforweb',
- 'device_type': 'com.crunchyroll.static',
- 'access_token': 'giKq5eY27ny3cqz',
- 'referer': f'{self._BASE_URL}/welcome/login'
- })
- if upsell_response['code'] != 'ok':
- raise ExtractorError('Could not get session id')
- session_id = upsell_response['data']['session_id']
-
- login_response = self._download_json(
- f'{self._API_BASE}/login.1.json', None, 'Logging in',
- data=urlencode_postdata({
- 'account': username,
- 'password': password,
- 'session_id': session_id
- }))
- if login_response['code'] != 'ok':
- raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
- if not self.is_logged_in:
- raise ExtractorError('Login succeeded but did not set etp_rt cookie')
+ try:
+ login_response = self._request_token(
+ headers={'Authorization': self._BASIC_AUTH}, data={
+ 'username': username,
+ 'password': password,
+ 'grant_type': 'password',
+ 'scope': 'offline_access',
+ }, note='Logging in', errnote='Failed to log in')
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+ raise ExtractorError('Invalid username and/or password', expected=True)
+ raise
+
+ CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
+ self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
+ self._set_auth_info(login_response)
def _update_auth(self):
- if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
+ if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
return
- if not CrunchyrollBaseIE._BASIC_AUTH:
- cx_api_param = self._CLIENT_ID[self.is_logged_in]
- self.write_debug(f'Using cxApiParam={cx_api_param}')
- CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
-
- grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
+ auth_headers = {'Authorization': self._BASIC_AUTH}
+ if CrunchyrollBaseIE._REFRESH_TOKEN:
+ data = {
+ 'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
+ 'grant_type': 'refresh_token',
+ 'scope': 'offline_access',
+ }
+ else:
+ data = {'grant_type': 'client_id'}
+ auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
try:
- auth_response = self._download_json(
- f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
- headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
+ auth_response = self._request_token(auth_headers, data)
except ExtractorError as error:
- if isinstance(error.cause, HTTPError) and error.cause.status == 403:
- raise ExtractorError(
- 'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
- 'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
- 'and your browser\'s User-Agent (with --user-agent)', expected=True)
- raise
+ username, password = self._get_login_info()
+ if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
+ raise
+ self.to_screen('Refresh token has expired. Re-logging in')
+ CrunchyrollBaseIE._REFRESH_TOKEN = None
+ self.cache.store(self._NETRC_MACHINE, username, None)
+ self._perform_login(username, password)
+ return
- CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
- CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
+ self._set_auth_info(auth_response)
def _locale_from_language(self, language):
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
@@ -135,62 +157,91 @@ class CrunchyrollBaseIE(InfoExtractor):
raise ExtractorError(f'Unexpected response when downloading {note} JSON')
return result
- def _extract_formats(self, stream_response, display_id=None):
- requested_formats = self._configuration_arg('format') or ['adaptive_hls']
- available_formats = {}
- for stream_type, streams in traverse_obj(
- stream_response, (('streams', ('data', 0)), {dict.items}, ...)):
- if stream_type not in requested_formats:
+ def _extract_chapters(self, internal_id):
+ # if no skip events are available, a 403 xml error is returned
+ skip_events = self._download_json(
+ f'https://static.crunchyroll.com/skip-events/production/{internal_id}.json',
+ internal_id, note='Downloading chapter info', fatal=False, errnote=False)
+ if not skip_events:
+ return None
+
+ chapters = []
+ for event in ('recap', 'intro', 'credits', 'preview'):
+ start = traverse_obj(skip_events, (event, 'start', {float_or_none}))
+ end = traverse_obj(skip_events, (event, 'end', {float_or_none}))
+ # some chapters have no start and/or ending time, they will just be ignored
+ if start is None or end is None:
continue
- for stream in traverse_obj(streams, lambda _, v: v['url']):
- hardsub_lang = stream.get('hardsub_locale') or ''
- format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
- available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
+ chapters.append({'title': event.capitalize(), 'start_time': start, 'end_time': end})
+
+ return chapters
+
+ def _extract_stream(self, identifier, display_id=None):
+ if not display_id:
+ display_id = identifier
+
+ self._update_auth()
+ headers = {**CrunchyrollBaseIE._AUTH_HEADERS, 'User-Agent': self._SWITCH_USER_AGENT}
+ try:
+ stream_response = self._download_json(
+ f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
+ display_id, note='Downloading stream info', errnote='Failed to download stream info', headers=headers)
+ except ExtractorError as error:
+ if self.get_param('ignore_no_formats_error'):
+ self.report_warning(error.orig_msg)
+ return [], {}
+ elif isinstance(error.cause, HTTPError) and error.cause.status == 420:
+ raise ExtractorError(
+ 'You have reached the rate-limit for active streams; try again later', expected=True)
+ raise
+
+ available_formats = {'': ('', '', stream_response['url'])}
+ for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
+ available_formats[hardsub_lang] = (f'hardsub-{hardsub_lang}', hardsub_lang, stream['url'])
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
- if '' in available_formats and 'all' not in requested_hardsubs:
+ hardsub_langs = [lang for lang in available_formats if lang]
+ if hardsub_langs and 'all' not in requested_hardsubs:
full_format_langs = set(requested_hardsubs)
+ self.to_screen(f'Available hardsub languages: {", ".join(hardsub_langs)}')
self.to_screen(
- 'To get all formats of a hardsub language, use '
+ 'To extract formats of a hardsub language, use '
'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
only_once=True)
else:
full_format_langs = set(map(str.lower, available_formats))
- audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False)
+ audio_locale = traverse_obj(stream_response, ('audioLocale', {str}))
hardsub_preference = qualities(requested_hardsubs[::-1])
- formats = []
- for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
- if stream_type.endswith('hls'):
- if hardsub_lang.lower() in full_format_langs:
- adaptive_formats = self._extract_m3u8_formats(
- stream_url, display_id, 'mp4', m3u8_id=format_id,
- fatal=False, note=f'Downloading {format_id} HLS manifest')
- else:
- adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
- elif stream_type.endswith('dash'):
- adaptive_formats = self._extract_mpd_formats(
- stream_url, display_id, mpd_id=format_id,
- fatal=False, note=f'Downloading {format_id} MPD manifest')
+ formats, subtitles = [], {}
+ for format_id, hardsub_lang, stream_url in available_formats.values():
+ if hardsub_lang.lower() in full_format_langs:
+ adaptive_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
+ stream_url, display_id, mpd_id=format_id, headers=CrunchyrollBaseIE._AUTH_HEADERS,
+ fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
+ self._merge_subtitles(dash_subs, target=subtitles)
else:
- self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
- continue
+ continue # XXX: Update this if meta mpd formats work; will be tricky with token invalidation
for f in adaptive_formats:
if f.get('acodec') != 'none':
f['language'] = audio_locale
f['quality'] = hardsub_preference(hardsub_lang.lower())
formats.extend(adaptive_formats)
- return formats
+ for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
+ subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
- def _extract_subtitles(self, data):
- subtitles = {}
-
- for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)):
- subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})]
+ # Invalidate stream token to avoid rate-limit
+ error_msg = 'Unable to invalidate stream token; you may experience rate-limiting'
+ if stream_token := stream_response.get('token'):
+ self._request_webpage(Request(
+ f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive',
+ headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False)
+ else:
+ self.report_warning(error_msg)
- return subtitles
+ return formats, subtitles
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
@@ -245,7 +296,11 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
'like_count': int,
'dislike_count': int,
},
- 'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
+ 'params': {
+ 'skip_download': 'm3u8',
+ 'extractor_args': {'crunchyrollbeta': {'hardsub': ['de-DE']}},
+ 'format': 'bv[format_id~=hardsub]',
+ },
}, {
# Premium only
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
@@ -306,6 +361,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
},
'params': {'skip_download': 'm3u8'},
+ 'skip': 'no longer exists',
}, {
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
'info_dict': {
@@ -359,31 +415,16 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
else:
raise ExtractorError(f'Unknown object type {object_type}')
- # There might be multiple audio languages for one object (`<object>_metadata.versions`),
- # so we need to get the id from `streams_link` instead or we dont know which language to choose
- streams_link = response.get('streams_link')
- if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
+ if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
message = f'This {object_type} is for premium members only'
- if self.is_logged_in:
- raise ExtractorError(message, expected=True)
- self.raise_login_required(message)
-
- # We need go from unsigned to signed api to avoid getting soft banned
- stream_response = self._call_cms_api_signed(remove_start(
- streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info')
- result['formats'] = self._extract_formats(stream_response, internal_id)
- result['subtitles'] = self._extract_subtitles(stream_response)
-
- # if no intro chapter is available, a 403 without usable data is returned
- intro_chapter = self._download_json(
- f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
- internal_id, note='Downloading chapter info', fatal=False, errnote=False)
- if isinstance(intro_chapter, dict):
- result['chapters'] = [{
- 'title': 'Intro',
- 'start_time': float_or_none(intro_chapter.get('startTime')),
- 'end_time': float_or_none(intro_chapter.get('endTime')),
- }]
+ if CrunchyrollBaseIE._REFRESH_TOKEN:
+ self.raise_no_formats(message, expected=True, video_id=internal_id)
+ else:
+ self.raise_login_required(message, method='password', metadata_available=True)
+ else:
+ result['formats'], result['subtitles'] = self._extract_stream(internal_id)
+
+ result['chapters'] = self._extract_chapters(internal_id)
def calculate_count(item):
return parse_count(''.join((item['displayed'], item.get('unit') or '')))
@@ -512,7 +553,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'display_id': 'egaono-hana',
'title': 'Egaono Hana',
'track': 'Egaono Hana',
- 'artist': 'Goose house',
+ 'artists': ['Goose house'],
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genres': ['J-Pop'],
},
@@ -525,11 +566,12 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'display_id': 'crossing-field',
'title': 'Crossing Field',
'track': 'Crossing Field',
- 'artist': 'LiSA',
+ 'artists': ['LiSA'],
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genres': ['Anime'],
},
'params': {'skip_download': 'm3u8'},
+ 'skip': 'no longer exists',
}, {
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
'info_dict': {
@@ -538,7 +580,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
- 'artist': 'LiSA',
+ 'artists': ['LiSA'],
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
'genres': ['J-Pop'],
@@ -566,16 +608,16 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
if not response:
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
- streams_link = response.get('streams_link')
- if not streams_link and response.get('isPremiumOnly'):
- message = f'This {response.get("type") or "media"} is for premium members only'
- if self.is_logged_in:
- raise ExtractorError(message, expected=True)
- self.raise_login_required(message)
-
result = self._transform_music_response(response)
- stream_response = self._call_api(streams_link, internal_id, lang, 'stream info')
- result['formats'] = self._extract_formats(stream_response, internal_id)
+
+ if not self._IS_PREMIUM and response.get('isPremiumOnly'):
+ message = f'This {response.get("type") or "media"} is for premium members only'
+ if CrunchyrollBaseIE._REFRESH_TOKEN:
+ self.raise_no_formats(message, expected=True, video_id=internal_id)
+ else:
+ self.raise_login_required(message, method='password', metadata_available=True)
+ else:
+ result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
return result
@@ -587,7 +629,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'display_id': 'slug',
'title': 'title',
'track': 'title',
- 'artist': ('artist', 'name'),
+ 'artists': ('artist', 'name', all),
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
'thumbnails': ('images', ..., ..., {
'url': ('source', {url_or_none}),
@@ -611,7 +653,7 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
'info_dict': {
'id': 'MA179CB50D',
'title': 'LiSA',
- 'genres': ['J-Pop', 'Anime', 'Rock'],
+ 'genres': ['Anime', 'J-Pop', 'Rock'],
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
},
'playlist_mincount': 83,
diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py
index 0075680..e56584e 100644
--- a/yt_dlp/extractor/cspan.py
+++ b/yt_dlp/extractor/cspan.py
@@ -1,10 +1,12 @@
import re
from .common import InfoExtractor
+from .senategov import SenateISVPIE
+from .ustream import UstreamIE
from ..compat import compat_HTMLParseError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
extract_attributes,
find_xpath_attr,
get_element_by_attribute,
@@ -19,8 +21,6 @@ from ..utils import (
str_to_int,
unescapeHTML,
)
-from .senategov import SenateISVPIE
-from .ustream import UstreamIE
class CSpanIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py
index cec178f..1817bd2 100644
--- a/yt_dlp/extractor/ctsnews.py
+++ b/yt_dlp/extractor/ctsnews.py
@@ -1,6 +1,6 @@
from .common import InfoExtractor
-from ..utils import unified_timestamp
from .youtube import YoutubeIE
+from ..utils import unified_timestamp
class CtsNewsIE(InfoExtractor):
diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py
index 43401e1..4c25bea 100644
--- a/yt_dlp/extractor/dailymail.py
+++ b/yt_dlp/extractor/dailymail.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- int_or_none,
determine_protocol,
+ int_or_none,
try_get,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py
index 5e14d6a..2e0f6f0 100644
--- a/yt_dlp/extractor/damtomo.py
+++ b/yt_dlp/extractor/damtomo.py
@@ -1,8 +1,8 @@
import re
from .common import InfoExtractor
-from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
from ..compat import compat_str
+from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
class DamtomoBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py
new file mode 100644
index 0000000..50e4136
--- /dev/null
+++ b/yt_dlp/extractor/dangalplay.py
@@ -0,0 +1,197 @@
+import hashlib
+import json
+import re
+import time
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class DangalPlayBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'dangalplay'
+ _OTV_USER_ID = None
+ _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage'
+ _API_BASE = 'https://ottapi.dangalplay.com'
+ _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js
+ _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above
+
+ def _perform_login(self, username, password):
+ if self._OTV_USER_ID:
+ return
+ if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password):
+ raise ExtractorError(self._LOGIN_HINT, expected=True)
+ self._OTV_USER_ID = password
+
+ def _real_initialize(self):
+ if not self._OTV_USER_ID:
+ self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None)
+
+ def _extract_episode_info(self, metadata, episode_slug, series_slug):
+ return {
+ 'display_id': episode_slug,
+ 'episode_number': int_or_none(self._search_regex(
+ r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)),
+ 'season_number': int_or_none(self._search_regex(
+ r'season-(\d+)', series_slug, 'season number', default='1')),
+ 'series': series_slug,
+ **traverse_obj(metadata, {
+ 'id': ('content_id', {str}),
+ 'title': ('display_title', {str}),
+ 'episode': ('title', {str}),
+ 'series': ('show_name', {str}, {lambda x: x or None}),
+ 'series_id': ('catalog_id', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'release_timestamp': ('release_date_uts', {int_or_none}),
+ }),
+ }
+
+ def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}):
+ return self._download_json(
+ f'{self._API_BASE}/{path}', display_id, note, fatal=fatal,
+ headers={'Accept': 'application/json'}, query={
+ 'auth_token': self._AUTH_TOKEN,
+ 'region': 'IN',
+ **query,
+ })
+
+
+class DangalPlayIE(DangalPlayBaseIE):
+ IE_NAME = 'dangalplay'
+ _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<series>[^/?#]+)/(?P<id>(?!episodes)[^/?#]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01',
+ 'info_dict': {
+ 'id': '647c61dc1e7171310dcd49b4',
+ 'ext': 'mp4',
+ 'release_timestamp': 1262304000,
+ 'episode_number': 1,
+ 'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
+ 'series': 'kitani-mohabbat-hai-season-2',
+ 'season_number': 2,
+ 'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
+ 'release_date': '20100101',
+ 'duration': 2325,
+ 'season': 'Season 2',
+ 'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01',
+ 'series_id': '645c9ea41e717158ca574966',
+ },
+ }, {
+ 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01',
+ 'info_dict': {
+ 'id': '65d31d9ba73b9c3abd14a7f3',
+ 'ext': 'mp4',
+ 'episode': 'EP 1 | MILKE BHI HUM NA MILE',
+ 'release_timestamp': 1708367411,
+ 'episode_number': 1,
+ 'season': 'Season 1',
+ 'title': 'EP 1 | MILKE BHI HUM NA MILE',
+ 'duration': 156048,
+ 'release_date': '20240219',
+ 'season_number': 1,
+ 'series': 'MILKE BHI HUM NA MILE',
+ 'series_id': '645c9ea41e717158ca574966',
+ 'display_id': 'milke-bhi-hum-na-mile-ep-number-01',
+ },
+ }]
+
+ def _generate_api_data(self, data):
+ catalog_id = data['catalog_id']
+ content_id = data['content_id']
+ timestamp = str(int(time.time()))
+ unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY))
+
+ return json.dumps({
+ 'catalog_id': catalog_id,
+ 'content_id': content_id,
+ 'category': '',
+ 'region': 'IN',
+ 'auth_token': self._AUTH_TOKEN,
+ 'id': self._OTV_USER_ID,
+ 'md5': hashlib.md5(unhashed.encode()).hexdigest(),
+ 'ts': timestamp,
+ }, separators=(',', ':')).encode()
+
+ def _real_extract(self, url):
+ series_slug, episode_slug = self._match_valid_url(url).group('series', 'id')
+ metadata = self._call_api(
+ f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip',
+ episode_slug, query={'item_language': ''})['data']
+
+ try:
+ details = self._download_json(
+ f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug,
+ 'Downloading playback details JSON', headers={
+ 'Accept': 'application/json',
+ 'Content-Type': 'application/json',
+ }, data=self._generate_api_data(metadata))['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 422:
+ error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {}
+ if error_info.get('code') == '1016':
+ self.raise_login_required(
+ f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None)
+ elif msg := error_info.get('message'):
+ raise ExtractorError(msg)
+ raise
+
+ m3u8_url = traverse_obj(details, (
+ ('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any))
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4')
+
+ return {
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **self._extract_episode_info(metadata, episode_slug, series_slug),
+ }
+
+
+class DangalPlaySeasonIE(DangalPlayBaseIE):
+ IE_NAME = 'dangalplay:season'
+ _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<id>[^/?#]+)(?:/(?P<sub>ep-[^/?#]+)/episodes)?/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1',
+ 'playlist_mincount': 170,
+ 'info_dict': {
+ 'id': 'kitani-mohabbat-hai-season-1',
+ },
+ }, {
+ 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes',
+ 'playlist_count': 30,
+ 'info_dict': {
+ 'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1',
+ },
+ }, {
+ # 1 season only, series page is season page
+ 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile',
+ 'playlist_mincount': 15,
+ 'info_dict': {
+ 'id': 'milke-bhi-hum-na-mile',
+ },
+ }]
+
+ def _entries(self, subcategories, series_slug):
+ for subcategory in subcategories:
+ data = self._call_api(
+ f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip',
+ series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={
+ 'order_by': 'asc',
+ 'status': 'published',
+ })
+ for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])):
+ episode_slug = ep['friendly_id']
+ yield self.url_result(
+ f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}',
+ DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug))
+
+ def _real_extract(self, url):
+ series_slug, subcategory = self._match_valid_url(url).group('id', 'sub')
+ subcategories = [subcategory] if subcategory else traverse_obj(
+ self._call_api(
+ f'catalogs/shows/items/{series_slug}.gzip', series_slug,
+ 'Downloading season info JSON', query={'item_language': ''}),
+ ('data', 'subcategories', ..., 'friendly_id', {str}))
+
+ return self.playlist_result(
+ self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory))
diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py
index 1624d08..1774249 100644
--- a/yt_dlp/extractor/democracynow.py
+++ b/yt_dlp/extractor/democracynow.py
@@ -1,11 +1,11 @@
-import re
import os.path
+import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
- url_basename,
remove_start,
+ url_basename,
)
diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py
index c11cd79..4380c41 100644
--- a/yt_dlp/extractor/digitalconcerthall.py
+++ b/yt_dlp/extractor/digitalconcerthall.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
ExtractorError,
parse_resolution,
diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py
index 1f3d8e3..b2663a6 100644
--- a/yt_dlp/extractor/discoverygo.py
+++ b/yt_dlp/extractor/discoverygo.py
@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
determine_ext,
extract_attributes,
- ExtractorError,
int_or_none,
parse_age_limit,
remove_end,
diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py
index 430de32..d8dde0c 100644
--- a/yt_dlp/extractor/disney.py
+++ b/yt_dlp/extractor/disney.py
@@ -2,10 +2,10 @@ import re
from .common import InfoExtractor
from ..utils import (
- int_or_none,
- unified_strdate,
determine_ext,
+ int_or_none,
join_nonempty,
+ unified_strdate,
update_url_query,
)
diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py
index ee8893d..244ffdf 100644
--- a/yt_dlp/extractor/douyutv.py
+++ b/yt_dlp/extractor/douyutv.py
@@ -1,5 +1,5 @@
-import time
import hashlib
+import time
import urllib
import uuid
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index 363b4be..ddf2128 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -4,8 +4,8 @@ import uuid
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
float_or_none,
int_or_none,
remove_start,
@@ -355,12 +355,10 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
video_id, headers=headers, data=json.dumps({
'deviceInfo': {
'adBlocker': False,
+ 'drmSupported': False,
},
'videoId': video_id,
- 'wisteriaProperties': {
- 'platform': 'desktop',
- 'product': self._PRODUCT,
- },
+ 'wisteriaProperties': {},
}).encode('utf-8'))['data']['attributes']['streaming']
def _real_extract(self, url):
@@ -878,10 +876,31 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
})
-class DiscoveryNetworksDeIE(DPlayBaseIE):
+class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
_TESTS = [{
+ 'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold',
+ 'info_dict': {
+ 'id': '4756322',
+ 'ext': 'mp4',
+ 'title': 'German Gold',
+ 'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6',
+ 'display_id': 'goldrausch-in-australien/german-gold',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'season': 'Season 5',
+ 'season_number': 5,
+ 'series': 'Goldrausch in Australien',
+ 'duration': 2648.0,
+ 'upload_date': '20230517',
+ 'timestamp': 1684357500,
+ 'creators': ['DMAX'],
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg',
+ 'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
'info_dict': {
'id': '78867',
@@ -901,9 +920,7 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
'season_number': 1,
'thumbnail': r're:https://.+\.jpg',
},
- 'params': {
- 'skip_download': True,
- },
+ 'skip': '404 Not Found',
}, {
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
'only_matching': True,
@@ -920,8 +937,14 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
return self._get_disco_api_info(
- url, '%s/%s' % (programme, alternate_id),
- 'sonic-eu1-prod.disco-api.com', realm, country)
+ url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country)
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': f'realm={realm}',
+ 'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
class DiscoveryPlusShowBaseIE(DPlayBaseIE):
diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
index bc2efce..0246975 100644
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -65,12 +65,14 @@ class DropboxIE(InfoExtractor):
formats, subtitles, has_anonymous_download = [], {}, False
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
+ if not has_anonymous_download:
+ has_anonymous_download = self._search_regex(
+ r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
transcode_url = self._search_regex(
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
if not transcode_url:
continue
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
- has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
break
# downloads enabled we can get the original file
diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py
index e5dab6a..a9247ed 100644
--- a/yt_dlp/extractor/drtuber.py
+++ b/yt_dlp/extractor/drtuber.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- int_or_none,
NO_DEFAULT,
+ int_or_none,
parse_duration,
str_to_int,
)
diff --git a/yt_dlp/extractor/dtube.py b/yt_dlp/extractor/dtube.py
index bb06c42..5ea014c 100644
--- a/yt_dlp/extractor/dtube.py
+++ b/yt_dlp/extractor/dtube.py
@@ -1,5 +1,5 @@
import json
-from socket import timeout
+import socket
from .common import InfoExtractor
from ..utils import (
@@ -56,7 +56,7 @@ class DTubeIE(InfoExtractor):
try:
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
self._downloader._opener.open(video_url, timeout=5).close()
- except timeout:
+ except socket.timeout:
self.to_screen(
'%s: %s URL is invalid, skipping' % (video_id, format_id))
continue
diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py
index 626e577..adc7705 100644
--- a/yt_dlp/extractor/duboku.py
+++ b/yt_dlp/extractor/duboku.py
@@ -5,9 +5,9 @@ import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
+ ExtractorError,
clean_html,
extract_attributes,
- ExtractorError,
get_elements_by_class,
int_or_none,
js_to_json,
diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py
index e671433..e6660dc 100644
--- a/yt_dlp/extractor/dvtv.py
+++ b/yt_dlp/extractor/dvtv.py
@@ -2,15 +2,15 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
join_nonempty,
js_to_json,
mimetype2ext,
+ parse_iso8601,
try_get,
unescapeHTML,
- parse_iso8601,
)
diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py
index f7b8520..feab804 100644
--- a/yt_dlp/extractor/dw.py
+++ b/yt_dlp/extractor/dw.py
@@ -1,10 +1,10 @@
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
int_or_none,
unified_strdate,
url_or_none,
)
-from ..compat import compat_urlparse
class DWIE(InfoExtractor):
diff --git a/yt_dlp/extractor/einthusan.py b/yt_dlp/extractor/einthusan.py
deleted file mode 100644
index 53bc253..0000000
--- a/yt_dlp/extractor/einthusan.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import json
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- extract_attributes,
- ExtractorError,
- get_elements_by_class,
- urlencode_postdata,
-)
-
-
-class EinthusanIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://einthusan.tv/movie/watch/9097/',
- 'md5': 'ff0f7f2065031b8a2cf13a933731c035',
- 'info_dict': {
- 'id': '9097',
- 'ext': 'mp4',
- 'title': 'Ae Dil Hai Mushkil',
- 'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }, {
- 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
- 'only_matching': True,
- }, {
- 'url': 'https://einthusan.com/movie/watch/9097/',
- 'only_matching': True,
- }, {
- 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
- 'only_matching': True,
- }]
-
- # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
- def _decrypt(self, encrypted_data, video_id):
- return self._parse_json(compat_b64decode((
- encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
- )).decode('utf-8'), video_id)
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- host = mobj.group('host')
- video_id = mobj.group('id')
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
-
- player_params = extract_attributes(self._search_regex(
- r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
-
- page_id = self._html_search_regex(
- '<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
- video_data = self._download_json(
- 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
- data=urlencode_postdata({
- 'xEvent': 'UIVideoPlayer.PingOutcome',
- 'xJson': json.dumps({
- 'EJOutcomes': player_params['data-ejpingables'],
- 'NativeHLS': False
- }),
- 'arcVersion': 3,
- 'appVersion': 59,
- 'gorilla.csrf.Token': page_id,
- }))['Data']
-
- if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
- raise ExtractorError(
- 'Download rate reached. Please try again later.', expected=True)
-
- ej_links = self._decrypt(video_data['EJLinks'], video_id)
-
- formats = []
-
- m3u8_url = ej_links.get('HLSLink')
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
-
- mp4_url = ej_links.get('MP4Link')
- if mp4_url:
- formats.append({
- 'url': mp4_url,
- })
-
- description = get_elements_by_class('synopsis', webpage)[0]
- thumbnail = self._html_search_regex(
- r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
- webpage, 'thumbnail url', fatal=False, group='url')
- if thumbnail is not None:
- thumbnail = compat_urlparse.urljoin(url, thumbnail)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnail': thumbnail,
- 'description': description,
- }
diff --git a/yt_dlp/extractor/eplus.py b/yt_dlp/extractor/eplus.py
index 88a8d5a..d2ad5b4 100644
--- a/yt_dlp/extractor/eplus.py
+++ b/yt_dlp/extractor/eplus.py
@@ -16,13 +16,31 @@ class EplusIbIE(InfoExtractor):
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
_TESTS = [{
- 'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
+ 'url': 'https://live.eplus.jp/ex/player?ib=41K6Wzbr3PlcMD%2FOKHFlC%2FcZCe2Eaw7FK%2BpJS1ooUHki8d0vGSy2mYqxillQBe1dSnOxU%2B8%2FzXKls4XPBSb3vw%3D%3D',
'info_dict': {
- 'id': '354502-0001-002',
- 'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
+ 'id': '335699-0001-006',
+ 'title': '少女☆歌劇 レヴュースタァライト -The LIVE 青嵐- BLUE GLITTER <定点映像配信>【Streaming+(配信)】',
'live_status': 'was_live',
- 'release_date': '20211231',
- 'release_timestamp': 1640952000,
+ 'release_date': '20201221',
+ 'release_timestamp': 1608544800,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': [
+ 'This event may not be accessible',
+ 'No video formats found',
+ 'Requested format is not available',
+ ],
+ }, {
+ 'url': 'https://live.eplus.jp/ex/player?ib=6QSsQdyRAwOFZrEHWlhRm7vocgV%2FO0YzBZ%2BaBEBg1XR%2FmbLn0R%2F048dUoAY038%2F%2F92MJ73BsoAtvUpbV6RLtDQ%3D%3D&show_id=2371511',
+ 'info_dict': {
+ 'id': '348021-0054-001',
+ 'title': 'ラブライブ!スーパースター!! Liella! First LoveLive! Tour ~Starlines~【東京/DAY.1】',
+ 'live_status': 'was_live',
+ 'release_date': '20220115',
+ 'release_timestamp': 1642233600,
'description': str,
},
'params': {
@@ -124,6 +142,10 @@ class EplusIbIE(InfoExtractor):
if data_json.get('drm_mode') == 'ON':
self.report_drm(video_id)
+ if data_json.get('is_pass_ticket') == 'YES':
+ raise ExtractorError(
+ 'This URL is for a pass ticket instead of a player page', expected=True)
+
delivery_status = data_json.get('delivery_status')
archive_mode = data_json.get('archive_mode')
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py
index 9ecdf5d..19c6933 100644
--- a/yt_dlp/extractor/ertgr.py
+++ b/yt_dlp/extractor/ertgr.py
@@ -4,15 +4,15 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
dict_get,
int_or_none,
merge_dicts,
- parse_qs,
parse_age_limit,
parse_iso8601,
+ parse_qs,
str_or_none,
try_get,
url_or_none,
diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py
index 191a436..0cf889a 100644
--- a/yt_dlp/extractor/europa.py
+++ b/yt_dlp/extractor/europa.py
@@ -8,7 +8,7 @@ from ..utils import (
qualities,
traverse_obj,
unified_strdate,
- xpath_text
+ xpath_text,
)
@@ -94,13 +94,14 @@ class EuropaIE(InfoExtractor):
class EuroParlWebstreamIE(InfoExtractor):
_VALID_URL = r'''(?x)
- https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
- (?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
+ https?://multimedia\.europarl\.europa\.eu/
+ (?:\w+/)?webstreaming/(?:[\w-]+_)?(?P<id>[\w-]+)
'''
_TESTS = [{
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
'info_dict': {
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
+ 'display_id': '20220914-0900-PLENARY',
'ext': 'mp4',
'title': 'Plenary session',
'release_timestamp': 1663139069,
@@ -125,6 +126,7 @@ class EuroParlWebstreamIE(InfoExtractor):
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
'info_dict': {
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
+ 'display_id': '20230301-1130-COMMITTEE-CULT',
'ext': 'mp4',
'release_date': '20230301',
'title': 'Committee on Culture and Education',
@@ -142,6 +144,19 @@ class EuroParlWebstreamIE(InfoExtractor):
'live_status': 'is_live',
},
'skip': 'Not live anymore'
+ }, {
+ 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER',
+ 'info_dict': {
+ 'id': 'c1f11567-5b52-470a-f3e1-08dc3c216ace',
+ 'display_id': '20240320-1345-SPECIAL-PRESSER',
+ 'ext': 'mp4',
+ 'release_date': '20240320',
+ 'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234',
+ 'release_timestamp': 1710939767,
+ }
+ }, {
+ 'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -166,6 +181,7 @@ class EuroParlWebstreamIE(InfoExtractor):
return {
'id': json_info['id'],
+ 'display_id': display_id,
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
'formats': formats,
'subtitles': subtitles,
diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py
index 65a1dc7..66fa42f 100644
--- a/yt_dlp/extractor/euscreen.py
+++ b/yt_dlp/extractor/euscreen.py
@@ -1,8 +1,7 @@
from .common import InfoExtractor
-
from ..utils import (
- parse_duration,
js_to_json,
+ parse_duration,
)
diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py
index d8b068e..4a13ab0 100644
--- a/yt_dlp/extractor/eyedotv.py
+++ b/yt_dlp/extractor/eyedotv.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..utils import (
- xpath_text,
- parse_duration,
ExtractorError,
+ parse_duration,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index 834b1df..b76407a 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -560,7 +560,7 @@ class FacebookIE(InfoExtractor):
js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats):
- dash_manifest = video.get('dash_manifest')
+ dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
if dash_manifest:
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py
index cddf254..1e80f9a 100644
--- a/yt_dlp/extractor/fancode.py
+++ b/yt_dlp/extractor/fancode.py
@@ -1,12 +1,6 @@
from .common import InfoExtractor
-
from ..compat import compat_str
-from ..utils import (
- parse_iso8601,
- ExtractorError,
- try_get,
- mimetype2ext
-)
+from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get
class FancodeVodIE(InfoExtractor):
diff --git a/yt_dlp/extractor/fathom.py b/yt_dlp/extractor/fathom.py
new file mode 100644
index 0000000..1df7d96
--- /dev/null
+++ b/yt_dlp/extractor/fathom.py
@@ -0,0 +1,54 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ float_or_none,
+ get_element_html_by_id,
+ parse_iso8601,
+)
+from ..utils.traversal import traverse_obj
+
+
+class FathomIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fathom\.video/share/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://fathom.video/share/G9mkjkspnohVVZ_L5nrsoPycyWcB8y7s',
+ 'md5': '0decd5343b8f30ae268625e79a02b60f',
+ 'info_dict': {
+ 'id': '47200596',
+ 'ext': 'mp4',
+ 'title': 'eCom Inucbator - Coaching Session',
+ 'duration': 8125.380507,
+ 'timestamp': 1699048914,
+ 'upload_date': '20231103',
+ },
+ }, {
+ 'url': 'https://fathom.video/share/mEws3bybftHL2QLymxYEDeE21vtLxGVm',
+ 'md5': '4f5cb382126c22d1aba8a939f9c49690',
+ 'info_dict': {
+ 'id': '46812957',
+ 'ext': 'mp4',
+ 'title': 'Jon, Lawrence, Neman chat about practice',
+ 'duration': 3571.517847,
+ 'timestamp': 1698933600,
+ 'upload_date': '20231102',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ props = traverse_obj(
+ get_element_html_by_id('app', webpage), ({extract_attributes}, 'data-page', {json.loads}, 'props'))
+ video_id = str(props['call']['id'])
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(props['call']['video_url'], video_id, 'mp4'),
+ **traverse_obj(props, {
+ 'title': ('head', 'title', {str}),
+ 'duration': ('duration', {float_or_none}),
+ 'timestamp': ('call', 'started_at', {parse_iso8601}),
+ }),
+ }
diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py
index bca62ad..796bac3 100644
--- a/yt_dlp/extractor/faz.py
+++ b/yt_dlp/extractor/faz.py
@@ -3,9 +3,9 @@ import re
from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..utils import (
+ int_or_none,
xpath_element,
xpath_text,
- int_or_none,
)
diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py
index 8175b6b..b2dbb92 100644
--- a/yt_dlp/extractor/fczenit.py
+++ b/yt_dlp/extractor/fczenit.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
float_or_none,
+ int_or_none,
)
diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py
index f604cbd..ae837f6 100644
--- a/yt_dlp/extractor/fifa.py
+++ b/yt_dlp/extractor/fifa.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
traverse_obj,
diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py
index 0cd18f4..69ca87c 100644
--- a/yt_dlp/extractor/filmon.py
+++ b/yt_dlp/extractor/filmon.py
@@ -2,10 +2,10 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
+ int_or_none,
qualities,
strip_or_none,
- int_or_none,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py
index f9d22fd..c10d290 100644
--- a/yt_dlp/extractor/gab.py
+++ b/yt_dlp/extractor/gab.py
@@ -7,7 +7,7 @@ from ..utils import (
parse_codecs,
parse_duration,
str_to_int,
- unified_timestamp
+ unified_timestamp,
)
diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py
index 1d3c0b1..b284e1e 100644
--- a/yt_dlp/extractor/gamejolt.py
+++ b/yt_dlp/extractor/gamejolt.py
@@ -10,7 +10,7 @@ from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
- try_get
+ try_get,
)
diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py
index bc56b03..6403be8 100644
--- a/yt_dlp/extractor/gaskrank.py
+++ b/yt_dlp/extractor/gaskrank.py
@@ -1,4 +1,5 @@
import re
+
from .common import InfoExtractor
from ..utils import (
float_or_none,
diff --git a/yt_dlp/extractor/gbnews.py b/yt_dlp/extractor/gbnews.py
new file mode 100644
index 0000000..bb1554e
--- /dev/null
+++ b/yt_dlp/extractor/gbnews.py
@@ -0,0 +1,107 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ get_elements_html_by_class,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class GBNewsIE(InfoExtractor):
+ IE_DESC = 'GB News clips, features and live streams'
+ _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
+
+ _PLATFORM = 'safari'
+ _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
+ _TESTS = [{
+ 'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
+ 'info_dict': {
+ 'id': '52264136',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
+ 'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
+ 'description': 'The post was criticised by former employers of the broadcaster',
+ 'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
+ },
+ }, {
+ 'url': 'https://www.gbnews.com/royal/prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
+ 'info_dict': {
+ 'id': '52328390',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
+ 'display_id': 'prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
+ 'description': 'Ingrid Seward has published 17 books documenting the highs and lows of the Royal Family',
+ 'title': 'Royal author claims Prince Harry was \'in love\' with Kate - Meghan was \'jealous\'',
+ }
+ }, {
+ 'url': 'https://www.gbnews.uk/watchlive',
+ 'info_dict': {
+ 'id': '1069',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
+ 'display_id': 'watchlive',
+ 'live_status': 'is_live',
+ 'title': r're:^GB News Live',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ @functools.lru_cache
+ def _get_ss_endpoint(self, data_id, data_env):
+ if not data_id:
+ data_id = 'GB003'
+ if not data_env:
+ data_env = 'production'
+
+ json_data = self._download_json(
+ self._SSMP_URL, None, 'Downloading Simplestream JSON metadata', query={
+ 'id': data_id,
+ 'env': data_env,
+ })
+ meta_url = traverse_obj(json_data, ('response', 'api_hostname', {url_or_none}))
+ if not meta_url:
+ raise ExtractorError('No API host found')
+
+ return meta_url
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url).rpartition('/')[2]
+ webpage = self._download_webpage(url, display_id)
+
+ video_data = None
+ elements = get_elements_html_by_class('simplestream', webpage)
+ for html_tag in elements:
+ attributes = extract_attributes(html_tag)
+ if 'sidebar' not in (attributes.get('class') or ''):
+ video_data = attributes
+ if not video_data:
+ raise ExtractorError('Could not find video element', expected=True)
+
+ endpoint_url = self._get_ss_endpoint(video_data.get('data-id'), video_data.get('data-env'))
+
+ uvid = video_data['data-uvid']
+ video_type = video_data.get('data-type')
+ if not video_type or video_type == 'vod':
+ video_type = 'show'
+ stream_data = self._download_json(
+ f'{endpoint_url}/api/{video_type}/stream/{uvid}',
+ uvid, 'Downloading stream JSON', query={
+ 'key': video_data.get('data-key'),
+ 'platform': self._PLATFORM,
+ })
+ if traverse_obj(stream_data, 'drm'):
+ self.report_drm(uvid)
+
+ return {
+ 'id': uvid,
+ 'display_id': display_id,
+ 'title': self._og_search_title(webpage, default=None),
+ 'description': self._og_search_description(webpage, default=None),
+ 'formats': self._extract_m3u8_formats(traverse_obj(stream_data, (
+ 'response', 'stream', {url_or_none})), uvid, 'mp4'),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'is_live': video_type == 'live',
+ }
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 9d82515..2818c71 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -4,7 +4,7 @@ import types
import urllib.parse
import xml.etree.ElementTree
-from .common import InfoExtractor # isort: split
+from .common import InfoExtractor
from .commonprotocols import RtmpIE
from .youtube import YoutubeIE
from ..compat import compat_etree_fromstring
@@ -2105,22 +2105,6 @@ class GenericIE(InfoExtractor):
},
},
{
- 'note': 'JW Player embed with unicode-escape sequences in URL',
- 'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics',
- 'info_dict': {
- 'id': 'm',
- 'ext': 'mp4',
- 'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi',
- 'description': 'Mahler\'s ',
- 'uploader': 'www.medici.tv',
- 'age_limit': 0,
- 'thumbnail': r're:^https?://.+\.jpg',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
'md5': 'e2f0a4c329f7986280b7328e24036d60',
'info_dict': {
diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py
index 7795dc5..b9dc7c6 100644
--- a/yt_dlp/extractor/gettr.py
+++ b/yt_dlp/extractor/gettr.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- bool_or_none,
ExtractorError,
+ bool_or_none,
dict_get,
float_or_none,
int_or_none,
diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py
index c5bc86b..7baf8de 100644
--- a/yt_dlp/extractor/gigya.py
+++ b/yt_dlp/extractor/gigya.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
ExtractorError,
urlencode_postdata,
diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py
index 22aac0d..515f3c5 100644
--- a/yt_dlp/extractor/glomex.py
+++ b/yt_dlp/extractor/glomex.py
@@ -3,9 +3,9 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
determine_ext,
extract_attributes,
- ExtractorError,
int_or_none,
parse_qs,
smuggle_url,
diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py
index b075a02..fba98d7 100644
--- a/yt_dlp/extractor/go.py
+++ b/yt_dlp/extractor/go.py
@@ -3,16 +3,16 @@ import re
from .adobepass import AdobePassIE
from ..compat import compat_str
from ..utils import (
- int_or_none,
+ ExtractorError,
determine_ext,
+ int_or_none,
parse_age_limit,
- remove_start,
remove_end,
+ remove_start,
+ traverse_obj,
try_get,
- urlencode_postdata,
- ExtractorError,
unified_timestamp,
- traverse_obj,
+ urlencode_postdata,
)
diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py
new file mode 100644
index 0000000..276a6c7
--- /dev/null
+++ b/yt_dlp/extractor/godresource.py
@@ -0,0 +1,79 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ str_or_none,
+ unified_timestamp,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class GodResourceIE(InfoExtractor):
+ _VALID_URL = r'https?://new\.godresource\.com/video/(?P<id>\w+)'
+ _TESTS = [{
+ # hls stream
+ 'url': 'https://new.godresource.com/video/A01mTKjyf6w',
+ 'info_dict': {
+ 'id': 'A01mTKjyf6w',
+ 'ext': 'mp4',
+ 'view_count': int,
+ 'timestamp': 1710978666,
+ 'channel_id': '5',
+ 'thumbnail': 'https://cdn-02.godresource.com/e42968ac-9e8b-4231-ab86-f4f9d775841f/thumbnail.jpg',
+ 'channel': 'Stedfast Baptist Church',
+ 'upload_date': '20240320',
+ 'title': 'GodResource video #A01mTKjyf6w',
+ }
+ }, {
+ # mp4 link
+ 'url': 'https://new.godresource.com/video/01DXmBbQv_X',
+ 'md5': '0e8f72aa89a106b9d5c011ba6f8717b7',
+ 'info_dict': {
+ 'id': '01DXmBbQv_X',
+ 'ext': 'mp4',
+ 'channel_id': '12',
+ 'view_count': int,
+ 'timestamp': 1687996800,
+ 'thumbnail': 'https://cdn-02.godresource.com/sodomitedeception/thumbnail.jpg',
+ 'channel': 'Documentaries',
+ 'title': 'The Sodomite Deception',
+ 'upload_date': '20230629',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ api_data = self._download_json(
+ f'https://api.godresource.com/api/Streams/{display_id}', display_id)
+
+ video_url = api_data['streamUrl']
+ is_live = api_data.get('isLive') or False
+ if (ext := determine_ext(video_url)) == 'm3u8':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ video_url, display_id, live=is_live)
+ elif ext == 'mp4':
+ formats, subtitles = [{
+ 'url': video_url,
+ 'ext': ext
+ }], {}
+ else:
+ raise ExtractorError(f'Unexpected video format {ext}')
+
+ return {
+ 'id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': '',
+ 'is_live': is_live,
+ **traverse_obj(api_data, {
+ 'title': ('title', {str}),
+ 'thumbnail': ('thumbnail', {url_or_none}),
+ 'view_count': ('views', {int}),
+ 'channel': ('channelName', {str}),
+ 'channel_id': ('channelId', {str_or_none}),
+ 'timestamp': ('streamDateCreated', {unified_timestamp}),
+ 'modified_timestamp': ('streamDataModified', {unified_timestamp})
+ })
+ }
diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py
index eb1dcf8..fac0884 100644
--- a/yt_dlp/extractor/gofile.py
+++ b/yt_dlp/extractor/gofile.py
@@ -1,10 +1,7 @@
import hashlib
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- try_get
-)
+from ..utils import ExtractorError, try_get
class GofileIE(InfoExtractor):
@@ -58,21 +55,18 @@ class GofileIE(InfoExtractor):
return
account_data = self._download_json(
- 'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
+ 'https://api.gofile.io/accounts', None, 'Getting a new guest account', data=b'{}')
self._TOKEN = account_data['data']['token']
self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
def _entries(self, file_id):
- query_params = {
- 'contentId': file_id,
- 'token': self._TOKEN,
- 'wt': '4fd6sg89d7s6', # From https://gofile.io/dist/js/alljs.js
- }
+ query_params = {'wt': '4fd6sg89d7s6'} # From https://gofile.io/dist/js/alljs.js
password = self.get_param('videopassword')
if password:
query_params['password'] = hashlib.sha256(password.encode('utf-8')).hexdigest()
files = self._download_json(
- 'https://api.gofile.io/getContent', file_id, note='Getting filelist', query=query_params)
+ f'https://api.gofile.io/contents/{file_id}', file_id, 'Getting filelist',
+ query=query_params, headers={'Authorization': f'Bearer {self._TOKEN}'})
status = files['status']
if status == 'error-passwordRequired':
@@ -82,7 +76,7 @@ class GofileIE(InfoExtractor):
raise ExtractorError(f'{self.IE_NAME} said: status {status}', expected=True)
found_files = False
- for file in (try_get(files, lambda x: x['data']['contents'], dict) or {}).values():
+ for file in (try_get(files, lambda x: x['data']['children'], dict) or {}).values():
file_type, file_format = file.get('mimetype').split('/', 1)
if file_type not in ('video', 'audio') and file_format != 'vnd.mts':
continue
diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
index 06658dd..c19192c 100644
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@@ -1,9 +1,11 @@
import re
from .common import InfoExtractor
+from .youtube import YoutubeIE
from ..compat import compat_parse_qs
from ..utils import (
ExtractorError,
+ bug_reports_message,
determine_ext,
extract_attributes,
get_element_by_class,
@@ -39,6 +41,17 @@ class GoogleDriveIE(InfoExtractor):
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
}
}, {
+ # has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
+ 'url': 'https://drive.google.com/uc?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+ 'md5': '322db8d63dd19788c04050a4bba67073',
+ 'info_dict': {
+ 'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+ 'ext': 'mp3',
+ 'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
+ 'duration': 184,
+ 'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+ },
+ }, {
# video can't be watched anonymously due to view count limit reached,
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
@@ -58,22 +71,8 @@ class GoogleDriveIE(InfoExtractor):
'only_matching': True,
}]
_FORMATS_EXT = {
- '5': 'flv',
- '6': 'flv',
- '13': '3gp',
- '17': '3gp',
- '18': 'mp4',
- '22': 'mp4',
- '34': 'flv',
- '35': 'flv',
- '36': '3gp',
- '37': 'mp4',
- '38': 'mp4',
- '43': 'webm',
- '44': 'webm',
- '45': 'webm',
- '46': 'webm',
- '59': 'mp4',
+ **{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
+ '50': 'm4a',
}
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
_CAPTIONS_ENTRY_TAG = {
@@ -194,10 +193,13 @@ class GoogleDriveIE(InfoExtractor):
if len(fmt_stream_split) < 2:
continue
format_id, format_url = fmt_stream_split[:2]
+ ext = self._FORMATS_EXT.get(format_id)
+ if not ext:
+ self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
f = {
'url': lowercase_escape(format_url),
'format_id': format_id,
- 'ext': self._FORMATS_EXT[format_id],
+ 'ext': ext,
}
resolution = resolutions.get(format_id)
if resolution:
diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py
index 74aad11..7a98e0f 100644
--- a/yt_dlp/extractor/goplay.py
+++ b/yt_dlp/extractor/goplay.py
@@ -1,6 +1,6 @@
import base64
import binascii
-import datetime
+import datetime as dt
import hashlib
import hmac
import json
@@ -422,7 +422,7 @@ class AwsIdp:
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
- time_now = datetime.datetime.now(datetime.timezone.utc)
+ time_now = dt.datetime.now(dt.timezone.utc)
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
time_string = time_now.strftime(format_string)
return time_string
diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py
index 112293b..9c1a6cb 100644
--- a/yt_dlp/extractor/gotostage.py
+++ b/yt_dlp/extractor/gotostage.py
@@ -1,11 +1,8 @@
+import json
+
from .common import InfoExtractor
from ..compat import compat_str
-from ..utils import (
- try_get,
- url_or_none
-)
-
-import json
+from ..utils import try_get, url_or_none
class GoToStageIE(InfoExtractor):
diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py
index 530bdb7..2551cff 100644
--- a/yt_dlp/extractor/hbo.py
+++ b/yt_dlp/extractor/hbo.py
@@ -2,11 +2,11 @@ import re
from .common import InfoExtractor
from ..utils import (
- xpath_text,
- xpath_element,
int_or_none,
parse_duration,
urljoin,
+ xpath_element,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py
index d1a400d..eb0a779 100644
--- a/yt_dlp/extractor/hearthisat.py
+++ b/yt_dlp/extractor/hearthisat.py
@@ -1,19 +1,20 @@
from .common import InfoExtractor
from ..utils import (
- determine_ext,
KNOWN_EXTENSIONS,
+ determine_ext,
str_to_int,
)
class HearThisAtIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
+ _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/?#]+)/(?P<title>[\w.-]+)'
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
_TESTS = [{
'url': 'https://hearthis.at/moofi/dr-kreep',
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
'info_dict': {
'id': '150939',
+ 'display_id': 'moofi - dr-kreep',
'ext': 'wav',
'title': 'Moofi - Dr. Kreep',
'thumbnail': r're:^https?://.*\.jpg$',
@@ -21,15 +22,16 @@ class HearThisAtIE(InfoExtractor):
'description': 'md5:1adb0667b01499f9d27e97ddfd53852a',
'upload_date': '20150118',
'view_count': int,
- 'duration': 71,
- 'genre': 'Experimental',
- }
+ 'duration': 70,
+ 'genres': ['Experimental'],
+ },
}, {
# 'download' link redirects to the original webpage
'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
'md5': '5980ceb7c461605d30f1f039df160c6e',
'info_dict': {
'id': '811296',
+ 'display_id': 'twitchsf - dj-jim-hopkins-totally-bitchin-80s-dance-mix',
'ext': 'mp3',
'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
'description': 'md5:ef26815ca8f483272a87b137ff175be2',
@@ -38,7 +40,39 @@ class HearThisAtIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'view_count': int,
'duration': 4360,
- 'genre': 'Dance',
+ 'genres': ['Dance'],
+ },
+ }, {
+ 'url': 'https://hearthis.at/tindalos/0001-tindalos-gnrique/eQd/',
+ 'md5': 'cd08e51911f147f6da2d9678905b0bd9',
+ 'info_dict': {
+ 'id': '2685222',
+ 'ext': 'mp3',
+ 'duration': 86,
+ 'view_count': int,
+ 'timestamp': 1545471670,
+ 'display_id': 'tindalos - 0001-tindalos-gnrique',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'genres': ['Other'],
+ 'title': 'Tindalos - Tindalos - générique n°1',
+ 'description': '',
+ 'upload_date': '20181222',
+ },
+ }, {
+ 'url': 'https://hearthis.at/sithi2/biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011/',
+ 'md5': 'b45ac60f0c8111eef6ddc10ec232e312',
+ 'info_dict': {
+ 'id': '7145959',
+ 'ext': 'mp3',
+ 'description': 'md5:d7ae36a453d78903f6b7ed6eb2fce1f2',
+ 'duration': 8986,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'md5:62669ce5b1b67f45c6f846033f37d3b9',
+ 'timestamp': 1588699409,
+ 'display_id': 'sithi2 - biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011',
+ 'view_count': int,
+ 'upload_date': '20200505',
+ 'genres': ['Other'],
},
}]
diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py
index e026996..099c2a1 100644
--- a/yt_dlp/extractor/hketv.py
+++ b/yt_dlp/extractor/hketv.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
merge_dicts,
parse_count,
diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py
index 57b76e4..41d50d0 100644
--- a/yt_dlp/extractor/hrti.py
+++ b/yt_dlp/extractor/hrti.py
@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
parse_age_limit,
try_get,
diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py
index c4965f9..5379b54 100644
--- a/yt_dlp/extractor/huya.py
+++ b/yt_dlp/extractor/huya.py
@@ -2,8 +2,8 @@ import hashlib
import random
import re
-from ..compat import compat_urlparse, compat_b64decode
-
+from .common import InfoExtractor
+from ..compat import compat_b64decode, compat_urlparse
from ..utils import (
ExtractorError,
int_or_none,
@@ -13,8 +13,6 @@ from ..utils import (
update_url_query,
)
-from .common import InfoExtractor
-
class HuyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
diff --git a/yt_dlp/extractor/hytale.py b/yt_dlp/extractor/hytale.py
index 0f4dcc3..e8cd21a 100644
--- a/yt_dlp/extractor/hytale.py
+++ b/yt_dlp/extractor/hytale.py
@@ -1,7 +1,8 @@
import re
+from .cloudflarestream import CloudflareStreamIE
from .common import InfoExtractor
-from ..utils import traverse_obj
+from ..utils.traversal import traverse_obj
class HytaleIE(InfoExtractor):
@@ -49,7 +50,7 @@ class HytaleIE(InfoExtractor):
entries = [
self.url_result(
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
- title=self._titles.get(video_hash), url_transparent=True)
+ CloudflareStreamIE, title=self._titles.get(video_hash), url_transparent=True)
for video_hash in re.findall(
r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
webpage)
diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py
index 9d55ddc..c28d09f 100644
--- a/yt_dlp/extractor/ichinanalive.py
+++ b/yt_dlp/extractor/ichinanalive.py
@@ -1,6 +1,6 @@
from .common import InfoExtractor
-from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
from ..compat import compat_str
+from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
class IchinanaLiveIE(InfoExtractor):
diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py
index 1fa0a2a..f32c116 100644
--- a/yt_dlp/extractor/imgur.py
+++ b/yt_dlp/extractor/imgur.py
@@ -76,6 +76,23 @@ class ImgurIE(ImgurBaseIE):
'thumbnail': 'https://i.imgur.com/jxBXAMCh.jpg',
'dislike_count': int,
},
+ }, {
+ # needs Accept header, ref: https://github.com/yt-dlp/yt-dlp/issues/9458
+ 'url': 'https://imgur.com/zV03bd5',
+ 'md5': '59df97884e8ba76143ff6b640a0e2904',
+ 'info_dict': {
+ 'id': 'zV03bd5',
+ 'ext': 'mp4',
+ 'title': 'Ive - Liz',
+ 'timestamp': 1710491255,
+ 'upload_date': '20240315',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'duration': 56.92,
+ 'comment_count': int,
+ 'release_timestamp': 1710491255,
+ 'release_date': '20240315',
+ },
}]
def _real_extract(self, url):
@@ -192,6 +209,7 @@ class ImgurIE(ImgurBaseIE):
'id': video_id,
'formats': formats,
'thumbnail': url_or_none(search('thumbnailUrl')),
+ 'http_headers': {'Accept': '*/*'},
}
diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py
index 192bcfe..2bb4850 100644
--- a/yt_dlp/extractor/infoq.py
+++ b/yt_dlp/extractor/infoq.py
@@ -1,3 +1,4 @@
+from .bokecc import BokeCCBaseIE
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
@@ -6,10 +7,9 @@ from ..compat import (
from ..utils import (
ExtractorError,
determine_ext,
- update_url_query,
traverse_obj,
+ update_url_query,
)
-from .bokecc import BokeCCBaseIE
class InfoQIE(BokeCCBaseIE):
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index f7f2150..46f9cd6 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -255,7 +255,7 @@ class InstagramIOSIE(InfoExtractor):
class InstagramIE(InstagramBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
@@ -379,6 +379,9 @@ class InstagramIE(InstagramBaseIE):
}, {
'url': 'https://www.instagram.com/marvelskies.fc/reel/CWqAgUZgCku/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.instagram.com/reels/Cop84x6u7CP/',
+ 'only_matching': True,
}]
@classmethod
diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py
index f7aa579..d5a3d80 100644
--- a/yt_dlp/extractor/iprima.py
+++ b/yt_dlp/extractor/iprima.py
@@ -3,12 +3,12 @@ import time
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
determine_ext,
js_to_json,
- urlencode_postdata,
- ExtractorError,
parse_qs,
- traverse_obj
+ traverse_obj,
+ urlencode_postdata,
)
diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py
index 3368ab1..85ed549 100644
--- a/yt_dlp/extractor/iqiyi.py
+++ b/yt_dlp/extractor/iqiyi.py
@@ -4,20 +4,16 @@ import re
import time
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_unquote
-)
from .openload import PhantomJSwrapper
+from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_urlencode
from ..utils import (
+ ExtractorError,
clean_html,
decode_packed_codes,
- ExtractorError,
float_or_none,
format_field,
- get_element_by_id,
get_element_by_attribute,
+ get_element_by_id,
int_or_none,
js_to_json,
ohdave_rsa_encrypt,
diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py
index 713fd4e..5d6fbaa 100644
--- a/yt_dlp/extractor/itprotv.py
+++ b/yt_dlp/extractor/itprotv.py
@@ -1,12 +1,11 @@
import re
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py
index 9ac7be3..55c4165 100644
--- a/yt_dlp/extractor/itv.py
+++ b/yt_dlp/extractor/itv.py
@@ -1,23 +1,22 @@
import json
-from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
-
+from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ JSON_LD_RE,
+ ExtractorError,
base_url,
clean_html,
determine_ext,
extract_attributes,
- ExtractorError,
get_element_by_class,
- JSON_LD_RE,
merge_dicts,
parse_duration,
smuggle_url,
try_get,
- url_or_none,
url_basename,
+ url_or_none,
urljoin,
)
diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index e23fdfd..a11f3f1 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -1,9 +1,9 @@
import functools
-import urllib.parse
-import urllib.error
import hashlib
import json
import time
+import urllib.error
+import urllib.parse
from .common import InfoExtractor
from ..utils import (
diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py
deleted file mode 100644
index 71fed49..0000000
--- a/yt_dlp/extractor/jable.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- InAdvancePagedList,
- int_or_none,
- orderedSet,
- unified_strdate,
-)
-
-
-class JableIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P<id>[\w-]+)'
- _TESTS = [{
- 'url': 'https://jable.tv/videos/pppd-812/',
- 'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
- 'info_dict': {
- 'id': 'pppd-812',
- 'ext': 'mp4',
- 'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
- 'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- 'like_count': int,
- 'view_count': int,
- },
- }, {
- 'url': 'https://jable.tv/videos/apak-220/',
- 'md5': '71f9239d69ced58ab74a816908847cc1',
- 'info_dict': {
- 'id': 'apak-220',
- 'ext': 'mp4',
- 'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
- 'description': '',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- 'like_count': int,
- 'view_count': int,
- 'upload_date': '20220319',
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- formats = self._extract_m3u8_formats(
- self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage, default=''),
- 'thumbnail': self._og_search_thumbnail(webpage, default=None),
- 'formats': formats,
- 'age_limit': 18,
- 'upload_date': unified_strdate(self._search_regex(
- r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
- 'view_count': int_or_none(self._search_regex(
- r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)',
- webpage, 'view_count', default='').replace(' ', '')),
- 'like_count': int_or_none(self._search_regex(
- r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)),
- }
-
-
-class JablePlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
- _TESTS = [{
- 'url': 'https://jable.tv/models/kaede-karen/',
- 'info_dict': {
- 'id': 'kaede-karen',
- 'title': '楓カレン',
- },
- 'playlist_count': 34,
- }, {
- 'url': 'https://jable.tv/categories/roleplay/',
- 'only_matching': True,
- }, {
- 'url': 'https://jable.tv/tags/girl/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
-
- def page_func(page_num):
- return [
- self.url_result(player_url, JableIE)
- for player_url in orderedSet(re.findall(
- r'href="(https://jable.tv/videos/[\w-]+/?)"',
- self._download_webpage(url, playlist_id, query={
- 'mode': 'async',
- 'from': page_num + 1,
- 'function': 'get_block',
- 'block_id': 'list_videos_common_videos_list',
- }, note=f'Downloading page {page_num + 1}')))]
-
- return self.playlist_result(
- InAdvancePagedList(page_func, int_or_none(self._search_regex(
- r'from:(\d+)">[^<]+\s*&raquo;', webpage, 'last page number', default=1)), 24),
- playlist_id, self._search_regex(
- r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None))
diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py
index a2bbba3..8557a81 100644
--- a/yt_dlp/extractor/jamendo.py
+++ b/yt_dlp/extractor/jamendo.py
@@ -1,8 +1,8 @@
import hashlib
import random
-from ..compat import compat_str
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
int_or_none,
diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py
index 6c65056..19d2b92 100644
--- a/yt_dlp/extractor/japandiet.py
+++ b/yt_dlp/extractor/japandiet.py
@@ -1,5 +1,6 @@
import re
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
@@ -9,9 +10,8 @@ from ..utils import (
smuggle_url,
traverse_obj,
try_call,
- unsmuggle_url
+ unsmuggle_url,
)
-from .common import InfoExtractor
def _parse_japanese_date(text):
diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py
new file mode 100644
index 0000000..e7186d7
--- /dev/null
+++ b/yt_dlp/extractor/jiocinema.py
@@ -0,0 +1,403 @@
+import base64
+import itertools
+import json
+import random
+import re
+import string
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ jwt_decode_hs256,
+ parse_age_limit,
+ try_call,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class JioCinemaBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'jiocinema'
+ _GEO_BYPASS = False
+ _ACCESS_TOKEN = None
+ _REFRESH_TOKEN = None
+ _GUEST_TOKEN = None
+ _USER_ID = None
+ _DEVICE_ID = None
+ _API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
+ _APP_NAME = {'appName': 'RJIL_JioCinema'}
+ _APP_VERSION = {'appVersion': '5.0.0'}
+ _API_SIGNATURES = 'o668nxgzwff'
+ _METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi'
+ _ACCESS_HINT = 'the `accessToken` from your browser local storage'
+ _LOGIN_HINT = (
+ 'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, '
+ f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. '
+ 'If you have previously logged in with yt-dlp and your session '
+ 'has been cached, you can use "-u device -p <DEVICE_ID>"')
+
+ def _cache_token(self, token_type):
+ assert token_type in ('access', 'refresh', 'all')
+ if token_type in ('access', 'all'):
+ self.cache.store(
+ JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN)
+ if token_type in ('refresh', 'all'):
+ self.cache.store(
+ JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN)
+
+ def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
+ return self._download_json(
+ url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ **self._API_HEADERS,
+ **headers,
+ }, expected_status=(400, 403, 474))
+
+ def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
+ return self._call_api(
+ f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
+ None, note=note, headers=headers, data=data)
+
+ def _refresh_token(self):
+ if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID:
+ raise ExtractorError('User token has expired', expected=True)
+ response = self._call_auth_api(
+ 'token', 'refreshtoken', 'Refreshing token',
+ headers={'accesstoken': self._ACCESS_TOKEN}, data={
+ **self._APP_NAME,
+ 'deviceId': self._DEVICE_ID,
+ 'refreshToken': self._REFRESH_TOKEN,
+ **self._APP_VERSION,
+ })
+ refresh_token = response.get('refreshTokenId')
+ if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN:
+ JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
+ self._cache_token('refresh')
+ JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
+ self._cache_token('access')
+
+ def _fetch_guest_token(self):
+ JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
+ guest_token = self._call_auth_api(
+ 'token', 'guest', 'Downloading guest token', data={
+ **self._APP_NAME,
+ 'deviceType': 'phone',
+ 'os': 'ios',
+ 'deviceId': self._DEVICE_ID,
+ 'freshLaunch': False,
+ 'adId': self._DEVICE_ID,
+ **self._APP_VERSION,
+ })
+ self._GUEST_TOKEN = guest_token['authToken']
+ self._USER_ID = guest_token['userId']
+
+ def _call_login_api(self, endpoint, guest_token, data, note):
+ return self._call_auth_api(
+ 'user', f'loginotp/{endpoint}', note, headers={
+ **self.geo_verification_headers(),
+ 'accesstoken': self._GUEST_TOKEN,
+ **self._APP_NAME,
+ **traverse_obj(guest_token, 'data', {
+ 'deviceType': ('deviceType', {str}),
+ 'os': ('os', {str}),
+ })}, data=data)
+
+ def _is_token_expired(self, token):
+ return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
+
+ def _perform_login(self, username, password):
+ if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
+ return
+
+ UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
+
+ if username.lower() == 'token':
+ if try_call(lambda: jwt_decode_hs256(password)):
+ JioCinemaBaseIE._ACCESS_TOKEN = password
+ refresh_hint = 'the `refreshToken` UUID from your browser local storage'
+ refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
+ if not refresh_token:
+ self.to_screen(
+ 'To extend the life of your login session, in addition to your access token, '
+ 'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" '
+ f'where REFRESH_TOKEN is {refresh_hint}')
+ elif re.fullmatch(UUID_RE, refresh_token):
+ JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
+ else:
+ self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}')
+ else:
+ raise ExtractorError(
+ f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True)
+
+ elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password):
+ JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh')
+ JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access')
+ if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN:
+ raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True)
+
+ elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password):
+ self._fetch_guest_token()
+ guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
+ initial_data = {
+ 'number': base64.b64encode(password.encode()).decode(),
+ **self._APP_VERSION,
+ }
+ response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
+ if not traverse_obj(response, ('OTPInfo', {dict})):
+ raise ExtractorError('There was a problem with the phone number login attempt')
+
+ is_iphone = guest_token.get('os') == 'ios'
+ response = self._call_login_api('verify', guest_token, {
+ 'deviceInfo': {
+ 'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
+ 'info': {
+ 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
+ 'androidId': self._DEVICE_ID,
+ 'type': 'iOS' if is_iphone else 'Android'
+ }
+ },
+ **initial_data,
+ 'otp': self._get_tfa_info('the one-time password sent to your phone')
+ }, 'Submitting OTP')
+ if traverse_obj(response, 'code') == 1043:
+ raise ExtractorError('Wrong OTP', expected=True)
+ JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken']
+ JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
+
+ else:
+ raise ExtractorError(self._LOGIN_HINT, expected=True)
+
+ user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data']
+ JioCinemaBaseIE._USER_ID = user_token['userId']
+ JioCinemaBaseIE._DEVICE_ID = user_token['deviceId']
+ if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device':
+ self._cache_token('all')
+ if self.get_param('cachedir') is not False:
+ self.to_screen(
+ f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"')
+ elif not JioCinemaBaseIE._REFRESH_TOKEN:
+ JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(
+ JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh')
+ if JioCinemaBaseIE._REFRESH_TOKEN:
+ self._cache_token('access')
+ self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"')
+ if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN):
+ self._refresh_token()
+
+
+class JioCinemaIE(JioCinemaBaseIE):
+ IE_NAME = 'jiocinema'
+ _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})'
+ _TESTS = [{
+ 'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
+ 'info_dict': {
+ 'id': '3759931',
+ 'ext': 'mp4',
+ 'title': 'Pradeep to stop the wedding?',
+ 'description': 'md5:75f72d1d1a66976633345a3de6d672b1',
+ 'episode': 'Pradeep to stop the wedding?',
+ 'episode_number': 89,
+ 'season': 'Agnisakshi…Ek Samjhauta-S1',
+ 'season_number': 1,
+ 'series': 'Agnisakshi Ek Samjhauta',
+ 'duration': 1238.0,
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'age_limit': 13,
+ 'season_id': '3698031',
+ 'upload_date': '20230606',
+ 'timestamp': 1686009600,
+ 'release_date': '20230607',
+ 'genres': ['Drama'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
+ 'info_dict': {
+ 'id': '3754021',
+ 'ext': 'mp4',
+ 'title': 'Bhediya',
+ 'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0',
+ 'episode': 'Bhediya',
+ 'duration': 8500.0,
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'age_limit': 13,
+ 'upload_date': '20230525',
+ 'timestamp': 1685026200,
+ 'release_date': '20230524',
+ 'genres': ['Comedy'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _extract_formats_and_subtitles(self, playback, video_id):
+ m3u8_url = traverse_obj(playback, (
+ 'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any))
+ if not m3u8_url: # DRM-only content only serves dash urls
+ self.report_drm(video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
+ self._remove_duplicate_formats(formats)
+
+ return {
+ # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
+ 'formats': traverse_obj(formats, (
+ lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
+ self._fetch_guest_token()
+ elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
+ self._refresh_token()
+
+ playback = self._call_api(
+ f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id,
+ 'Downloading playback JSON', headers={
+ **self.geo_verification_headers(),
+ 'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
+ **self._APP_NAME,
+ 'deviceid': self._DEVICE_ID,
+ 'uniqueid': self._USER_ID,
+ 'x-apisignatures': self._API_SIGNATURES,
+ 'x-platform': 'androidweb',
+ 'x-platform-token': 'web',
+ }, data={
+ '4k': False,
+ 'ageGroup': '18+',
+ 'appVersion': '3.4.0',
+ 'bitrateProfile': 'xhdpi',
+ 'capability': {
+ 'drmCapability': {
+ 'aesSupport': 'yes',
+ 'fairPlayDrmSupport': 'none',
+ 'playreadyDrmSupport': 'none',
+ 'widevineDRMSupport': 'none'
+ },
+ 'frameRateCapability': [{
+ 'frameRateSupport': '30fps',
+ 'videoQuality': '1440p'
+ }]
+ },
+ 'continueWatchingRequired': False,
+ 'dolby': False,
+ 'downloadRequest': False,
+ 'hevc': False,
+ 'kidsSafe': False,
+ 'manufacturer': 'Windows',
+ 'model': 'Windows',
+ 'multiAudioRequired': True,
+ 'osVersion': '10',
+ 'parentalPinValid': True,
+ 'x-apisignatures': self._API_SIGNATURES
+ })
+
+ status_code = traverse_obj(playback, ('code', {int}))
+ if status_code == 474:
+ self.raise_geo_restricted(countries=['IN'])
+ elif status_code == 1008:
+ error_msg = 'This content is only available for premium users'
+ if self._ACCESS_TOKEN:
+ raise ExtractorError(error_msg, expected=True)
+ self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None)
+ elif status_code == 400:
+ raise ExtractorError('The requested content is not available', expected=True)
+ elif status_code is not None and status_code != 200:
+ raise ExtractorError(
+ f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}')
+
+ metadata = self._download_json(
+ f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details',
+ video_id, fatal=False, query={
+ 'ids': f'include:{video_id}',
+ 'responseType': 'common',
+ 'devicePlatformType': 'desktop',
+ })
+
+ return {
+ 'id': video_id,
+ 'http_headers': self._API_HEADERS,
+ **self._extract_formats_and_subtitles(playback, video_id),
+ **traverse_obj(playback, ('data', {
+ # fallback metadata
+ 'title': ('name', {str}),
+ 'description': ('fullSynopsis', {str}),
+ 'series': ('show', 'name', {str}, {lambda x: x or None}),
+ 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
+ 'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}),
+ 'episode': ('fullTitle', {str}),
+ 'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}),
+ 'age_limit': ('ageNemonic', {parse_age_limit}),
+ 'duration': ('totalDuration', {float_or_none}),
+ 'thumbnail': ('images', {url_or_none}),
+ })),
+ **traverse_obj(metadata, ('result', 0, {
+ 'title': ('fullTitle', {str}),
+ 'description': ('fullSynopsis', {str}),
+ 'series': ('showName', {str}, {lambda x: x or None}),
+ 'season': ('seasonName', {str}, {lambda x: x or None}),
+ 'season_number': ('season', {int_or_none}),
+ 'season_id': ('seasonId', {str}, {lambda x: x or None}),
+ 'episode': ('fullTitle', {str}),
+ 'episode_number': ('episode', {int_or_none}),
+ 'timestamp': ('uploadTime', {int_or_none}),
+ 'release_date': ('telecastDate', {str}),
+ 'age_limit': ('ageNemonic', {parse_age_limit}),
+ 'duration': ('duration', {float_or_none}),
+ 'genres': ('genres', ..., {str}),
+ 'thumbnail': ('seo', 'ogImage', {url_or_none}),
+ })),
+ }
+
+
+class JioCinemaSeriesIE(JioCinemaBaseIE):
+ IE_NAME = 'jiocinema:series'
+ _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})'
+ _TESTS = [{
+ 'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
+ 'info_dict': {
+ 'id': '3499917',
+ 'title': 'naagin',
+ },
+ 'playlist_mincount': 120,
+ }]
+
+ def _entries(self, series_id):
+ seasons = self._download_json(
+ f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
+ 'Downloading series metadata JSON', query={
+ 'sort': 'season:asc',
+ 'id': series_id,
+ 'responseType': 'common',
+ })
+
+ for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
+ season_id = season['id']
+ label = season.get('season') or season_num
+ for page_num in itertools.count(1):
+ episodes = traverse_obj(self._download_json(
+ f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
+ season_id, f'Downloading season {label} page {page_num} JSON', query={
+ 'sort': 'episode:asc',
+ 'id': season_id,
+ 'responseType': 'common',
+ 'page': page_num,
+ }), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
+ if not episodes:
+ break
+ for episode in episodes:
+ yield self.url_result(
+ episode['slug'], JioCinemaIE, **traverse_obj(episode, {
+ 'video_id': 'id',
+ 'video_title': ('fullTitle', {str}),
+ 'season_number': ('season', {int_or_none}),
+ 'episode_number': ('episode', {int_or_none}),
+ }))
+
+ def _real_extract(self, url):
+ slug, series_id = self._match_valid_url(url).group('slug', 'id')
+ return self.playlist_result(self._entries(series_id), series_id, slug)
diff --git a/yt_dlp/extractor/jiosaavn.py b/yt_dlp/extractor/jiosaavn.py
index a592098..35fb3fd 100644
--- a/yt_dlp/extractor/jiosaavn.py
+++ b/yt_dlp/extractor/jiosaavn.py
@@ -1,89 +1,143 @@
+import functools
+import math
+import re
+
from .common import InfoExtractor
from ..utils import (
+ InAdvancePagedList,
+ clean_html,
int_or_none,
- js_to_json,
+ make_archive_id,
+ smuggle_url,
+ unsmuggle_url,
+ url_basename,
url_or_none,
urlencode_postdata,
- urljoin,
)
from ..utils.traversal import traverse_obj
class JioSaavnBaseIE(InfoExtractor):
- def _extract_initial_data(self, url, audio_id):
- webpage = self._download_webpage(url, audio_id)
- return self._search_json(
- r'window\.__INITIAL_DATA__\s*=', webpage,
- 'init json', audio_id, transform_source=js_to_json)
+ _API_URL = 'https://www.jiosaavn.com/api.php'
+ _VALID_BITRATES = {'16', '32', '64', '128', '320'}
+
+ @functools.cached_property
+ def requested_bitrates(self):
+ requested_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
+ if invalid_bitrates := set(requested_bitrates) - self._VALID_BITRATES:
+ raise ValueError(
+ f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
+ + f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
+ return requested_bitrates
+
+ def _extract_formats(self, song_data):
+ for bitrate in self.requested_bitrates:
+ media_data = self._download_json(
+ self._API_URL, song_data['id'],
+ f'Downloading format info for {bitrate}',
+ fatal=False, data=urlencode_postdata({
+ '__call': 'song.generateAuthToken',
+ '_format': 'json',
+ 'bitrate': bitrate,
+ 'url': song_data['encrypted_media_url'],
+ }))
+ if not traverse_obj(media_data, ('auth_url', {url_or_none})):
+ self.report_warning(f'Unable to extract format info for {bitrate}')
+ continue
+ ext = media_data.get('type')
+ yield {
+ 'url': media_data['auth_url'],
+ 'ext': 'm4a' if ext == 'mp4' else ext,
+ 'format_id': bitrate,
+ 'abr': int(bitrate),
+ 'vcodec': 'none',
+ }
+
+ def _extract_song(self, song_data, url=None):
+ info = traverse_obj(song_data, {
+ 'id': ('id', {str}),
+ 'title': ('song', {clean_html}),
+ 'album': ('album', {clean_html}),
+ 'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
+ 'duration': ('duration', {int_or_none}),
+ 'view_count': ('play_count', {int_or_none}),
+ 'release_year': ('year', {int_or_none}),
+ 'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
+ 'webpage_url': ('perma_url', {url_or_none}),
+ })
+ if webpage_url := info.get('webpage_url') or url:
+ info['display_id'] = url_basename(webpage_url)
+ info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
+
+ return info
+
+ def _call_api(self, type_, token, note='API', params={}):
+ return self._download_json(
+ self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
+ query={
+ '__call': 'webapi.get',
+ '_format': 'json',
+ '_marker': '0',
+ 'ctx': 'web6dot0',
+ 'token': token,
+ 'type': type_,
+ **params,
+ })
+
+ def _yield_songs(self, playlist_data):
+ for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
+ song_info = self._extract_song(song_data)
+ url = smuggle_url(song_info['webpage_url'], {
+ 'id': song_data['id'],
+ 'encrypted_media_url': song_data['encrypted_media_url'],
+ })
+ yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
class JioSaavnSongIE(JioSaavnBaseIE):
+ IE_NAME = 'jiosaavn:song'
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
'md5': '3b84396d15ed9e083c3106f1fa589c04',
'info_dict': {
- 'id': 'OQsEfQFVUXk',
- 'ext': 'mp4',
+ 'id': 'IcoLuefJ',
+ 'display_id': 'OQsEfQFVUXk',
+ 'ext': 'm4a',
'title': 'Leja Re',
'album': 'Leja Re',
- 'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
+ 'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
'duration': 205,
'view_count': int,
'release_year': 2018,
+ 'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
+ '_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
},
}, {
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
'only_matching': True,
}]
- _VALID_BITRATES = ('16', '32', '64', '128', '320')
-
def _real_extract(self, url):
- audio_id = self._match_id(url)
- extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
- if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
- raise ValueError(
- f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
- + f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}')
+ url, smuggled_data = unsmuggle_url(url)
+ song_data = traverse_obj(smuggled_data, ({
+ 'id': ('id', {str}),
+ 'encrypted_media_url': ('encrypted_media_url', {str}),
+ }))
- song_data = self._extract_initial_data(url, audio_id)['song']['song']
- formats = []
- for bitrate in extract_bitrates:
- media_data = self._download_json(
- 'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}',
- fatal=False, data=urlencode_postdata({
- '__call': 'song.generateAuthToken',
- '_format': 'json',
- 'bitrate': bitrate,
- 'url': song_data['encrypted_media_url'],
- }))
- if not media_data.get('auth_url'):
- self.report_warning(f'Unable to extract format info for {bitrate}')
- continue
- formats.append({
- 'url': media_data['auth_url'],
- 'ext': media_data.get('type'),
- 'format_id': bitrate,
- 'abr': int(bitrate),
- 'vcodec': 'none',
- })
+ if 'id' in song_data and 'encrypted_media_url' in song_data:
+ result = {'id': song_data['id']}
+ else:
+ # only extract metadata if this is not a url_transparent result
+ song_data = self._call_api('song', self._match_id(url))['songs'][0]
+ result = self._extract_song(song_data, url)
- return {
- 'id': audio_id,
- 'formats': formats,
- **traverse_obj(song_data, {
- 'title': ('title', 'text'),
- 'album': ('album', 'text'),
- 'thumbnail': ('image', 0, {url_or_none}),
- 'duration': ('duration', {int_or_none}),
- 'view_count': ('play_count', {int_or_none}),
- 'release_year': ('year', {int_or_none}),
- }),
- }
+ result['formats'] = list(self._extract_formats(song_data))
+ return result
class JioSaavnAlbumIE(JioSaavnBaseIE):
+ IE_NAME = 'jiosaavn:album'
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
@@ -95,11 +149,46 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
}]
def _real_extract(self, url):
- album_id = self._match_id(url)
- album_view = self._extract_initial_data(url, album_id)['albumView']
-
- return self.playlist_from_matches(
- traverse_obj(album_view, (
- 'modules', lambda _, x: x['key'] == 'list', 'data', ..., 'title', 'action', {str})),
- album_id, traverse_obj(album_view, ('album', 'title', 'text', {str})), ie=JioSaavnSongIE,
- getter=lambda x: urljoin('https://www.jiosaavn.com/', x))
+ display_id = self._match_id(url)
+ album_data = self._call_api('album', display_id)
+
+ return self.playlist_result(
+ self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
+
+
+class JioSaavnPlaylistIE(JioSaavnBaseIE):
+ IE_NAME = 'jiosaavn:playlist'
+ _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
+ 'info_dict': {
+ 'id': 'LlJ8ZWT1ibN5084vKHRj2Q__',
+ 'title': 'Mood English',
+ },
+ 'playlist_mincount': 301,
+ }, {
+ 'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-hindi/DVR,pFUOwyXqIp77B1JF,A__',
+ 'info_dict': {
+ 'id': 'DVR,pFUOwyXqIp77B1JF,A__',
+ 'title': 'Mood Hindi',
+ },
+ 'playlist_mincount': 801,
+ }]
+ _PAGE_SIZE = 50
+
+ def _fetch_page(self, token, page):
+ return self._call_api(
+ 'playlist', token, f'playlist page {page}', {'p': page, 'n': self._PAGE_SIZE})
+
+ def _entries(self, token, first_page_data, page):
+ page_data = first_page_data if not page else self._fetch_page(token, page + 1)
+ yield from self._yield_songs(page_data)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ playlist_data = self._fetch_page(display_id, 1)
+ total_pages = math.ceil(int(playlist_data['list_count']) / self._PAGE_SIZE)
+
+ return self.playlist_result(InAdvancePagedList(
+ functools.partial(self._entries, display_id, playlist_data),
+ total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
diff --git a/yt_dlp/extractor/joqrag.py b/yt_dlp/extractor/joqrag.py
index 3bb28af..7a91d4a 100644
--- a/yt_dlp/extractor/joqrag.py
+++ b/yt_dlp/extractor/joqrag.py
@@ -1,4 +1,4 @@
-import datetime
+import datetime as dt
import urllib.parse
from .common import InfoExtractor
@@ -50,8 +50,8 @@ class JoqrAgIE(InfoExtractor):
def _extract_start_timestamp(self, video_id, is_live):
def extract_start_time_from(date_str):
- dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
- date = dt.strftime('%Y%m%d')
+ dt_ = datetime_from_str(date_str) + dt.timedelta(hours=9)
+ date = dt_.strftime('%Y%m%d')
start_time = self._search_regex(
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
self._download_webpage(
@@ -60,7 +60,7 @@ class JoqrAgIE(InfoExtractor):
errnote=f'Failed to download program list of {date}') or '',
'start time', default=None)
if start_time:
- return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
+ return unified_timestamp(f'{dt_.strftime("%Y/%m/%d")} {start_time} +09:00')
return None
start_timestamp = extract_start_time_from('today')
@@ -80,14 +80,14 @@ class JoqrAgIE(InfoExtractor):
note='Downloading metadata', errnote='Failed to download metadata')
title = self._extract_metadata('Program_name', metadata)
- if title == '放送休止':
+ if not title or title == '放送休止':
formats = []
live_status = 'is_upcoming'
release_timestamp = self._extract_start_timestamp(video_id, False)
msg = 'This stream is not currently live'
if release_timestamp:
msg += (' and will start at '
- + datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
+ + dt.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
self.raise_no_formats(msg, expected=True)
else:
m3u8_path = self._search_regex(
diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py
index 245fe73..8069fea 100644
--- a/yt_dlp/extractor/jove.py
+++ b/yt_dlp/extractor/jove.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- unified_strdate
-)
+from ..utils import ExtractorError, unified_strdate
class JoveIE(InfoExtractor):
diff --git a/yt_dlp/extractor/jstream.py b/yt_dlp/extractor/jstream.py
index 3e2e627..00ac7cc 100644
--- a/yt_dlp/extractor/jstream.py
+++ b/yt_dlp/extractor/jstream.py
@@ -1,6 +1,6 @@
import base64
-import re
import json
+import re
from .common import InfoExtractor
from ..utils import (
diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py
index 43055e8..563aa2d 100644
--- a/yt_dlp/extractor/kakao.py
+++ b/yt_dlp/extractor/kakao.py
@@ -3,8 +3,8 @@ from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
- strip_or_none,
str_or_none,
+ strip_or_none,
traverse_obj,
unified_timestamp,
)
diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py
index 95e2dee..4752d5a 100644
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@@ -4,18 +4,18 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_parse_qs,
+ compat_urlparse,
)
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
format_field,
int_or_none,
- unsmuggle_url,
+ remove_start,
smuggle_url,
traverse_obj,
- remove_start
+ unsmuggle_url,
)
diff --git a/yt_dlp/extractor/kankanews.py b/yt_dlp/extractor/kankanews.py
index 8f247b3..3d74c74 100644
--- a/yt_dlp/extractor/kankanews.py
+++ b/yt_dlp/extractor/kankanews.py
@@ -1,7 +1,7 @@
-import time
+import hashlib
import random
import string
-import hashlib
+import time
import urllib.parse
from .common import InfoExtractor
diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py
index d124372..889548f 100644
--- a/yt_dlp/extractor/kick.py
+++ b/yt_dlp/extractor/kick.py
@@ -13,7 +13,8 @@ from ..utils import (
class KickBaseIE(InfoExtractor):
def _real_initialize(self):
- self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False)
+ self._request_webpage(
+ HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False, impersonate=True)
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
if not xsrf_token:
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
@@ -25,7 +26,7 @@ class KickBaseIE(InfoExtractor):
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
return self._download_json(
f'https://kick.com/api/v1/{path}', display_id, note=note,
- headers=merge_dicts(headers, self._API_HEADERS), **kwargs)
+ headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
class KickIE(KickBaseIE):
@@ -82,26 +83,27 @@ class KickIE(KickBaseIE):
class KickVODIE(KickBaseIE):
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
- 'url': 'https://kick.com/video/54244b5e-050a-4df4-a013-b2433dafbe35',
- 'md5': '73691206a6a49db25c5aa1588e6538fc',
+ 'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
+ 'md5': '3870f94153e40e7121a6e46c068b70cb',
'info_dict': {
- 'id': '54244b5e-050a-4df4-a013-b2433dafbe35',
+ 'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
'ext': 'mp4',
- 'title': 'Making 710-carBoosting. Kinda No Pixel inspired. !guilded - !links',
- 'description': 'md5:a0d3546bf7955d0a8252ffe0fd6f518f',
- 'channel': 'kmack710',
- 'channel_id': '16278',
- 'uploader': 'Kmack710',
- 'uploader_id': '16412',
- 'upload_date': '20221206',
- 'timestamp': 1670318289,
- 'duration': 40104.0,
+ 'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
+ 'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
+ 'channel': 'jaredfps',
+ 'channel_id': '26608',
+ 'uploader': 'JaredFPS',
+ 'uploader_id': '26799',
+ 'upload_date': '20240402',
+ 'timestamp': 1712097108,
+ 'duration': 33859.0,
'thumbnail': r're:^https?://.*\.jpg',
- 'categories': ['Grand Theft Auto V'],
+ 'categories': ['Call of Duty: Warzone'],
},
'params': {
'skip_download': 'm3u8',
},
+ 'expected_warnings': [r'impersonation'],
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py
index 3c93ded..b776671 100644
--- a/yt_dlp/extractor/kuwo.py
+++ b/yt_dlp/extractor/kuwo.py
@@ -3,10 +3,10 @@ import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
- get_element_by_id,
- clean_html,
ExtractorError,
InAdvancePagedList,
+ clean_html,
+ get_element_by_id,
remove_start,
)
diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py
index e7d2f8a..708cb54 100644
--- a/yt_dlp/extractor/lci.py
+++ b/yt_dlp/extractor/lci.py
@@ -1,9 +1,25 @@
from .common import InfoExtractor
+from .wat import WatIE
+from ..utils import ExtractorError, int_or_none
+from ..utils.traversal import traverse_obj
class LCIIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P<id>\d+)\.html'
_TESTS = [{
+ 'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html',
+ 'info_dict': {
+ 'id': '14113788',
+ 'ext': 'mp4',
+ 'title': '24H Pujadas du vendredi 24 mai 2024',
+ 'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg',
+ 'upload_date': '20240524',
+ 'duration': 6158,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
'info_dict': {
'id': '13875948',
@@ -24,5 +40,10 @@ class LCIIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id')
- return self.url_result('wat:' + wat_id, 'Wat', wat_id)
+ next_data = self._search_nextjs_data(webpage, video_id)
+ wat_id = traverse_obj(next_data, (
+ 'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any))
+ if wat_id is None:
+ raise ExtractorError('Could not find wat_id')
+
+ return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id))
diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py
index 9846319..6287419 100644
--- a/yt_dlp/extractor/lcp.py
+++ b/yt_dlp/extractor/lcp.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .arkena import ArkenaIE
+from .common import InfoExtractor
class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE
diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py
index 10fb5d4..1a3ada1 100644
--- a/yt_dlp/extractor/lecture2go.py
+++ b/yt_dlp/extractor/lecture2go.py
@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..utils import (
determine_ext,
determine_protocol,
- parse_duration,
int_or_none,
+ parse_duration,
)
diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py
index 629d208..90f0268 100644
--- a/yt_dlp/extractor/lecturio.py
+++ b/yt_dlp/extractor/lecturio.py
@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
float_or_none,
int_or_none,
str_or_none,
diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py
index 85033b8..a113b3d 100644
--- a/yt_dlp/extractor/leeco.py
+++ b/yt_dlp/extractor/leeco.py
@@ -1,4 +1,4 @@
-import datetime
+import datetime as dt
import hashlib
import re
import time
@@ -11,9 +11,9 @@ from ..compat import (
compat_urllib_parse_urlencode,
)
from ..utils import (
+ ExtractorError,
determine_ext,
encode_data_uri,
- ExtractorError,
int_or_none,
orderedSet,
parse_iso8601,
@@ -185,7 +185,7 @@ class LeIE(InfoExtractor):
publish_time = parse_iso8601(self._html_search_regex(
r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
- delimiter=' ', timezone=datetime.timedelta(hours=8))
+ delimiter=' ', timezone=dt.timedelta(hours=8))
description = self._html_search_meta('description', page, fatal=False)
return {
diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py
index b76ca09..2979939 100644
--- a/yt_dlp/extractor/libraryofcongress.py
+++ b/yt_dlp/extractor/libraryofcongress.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import (
determine_ext,
float_or_none,
diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py
index 919cfcb..ea150a5 100644
--- a/yt_dlp/extractor/lifenews.py
+++ b/yt_dlp/extractor/lifenews.py
@@ -6,8 +6,8 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_iso8601,
remove_end,
diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py
index 4e50f10..1ff091d 100644
--- a/yt_dlp/extractor/limelight.py
+++ b/yt_dlp/extractor/limelight.py
@@ -3,13 +3,13 @@ import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
determine_ext,
float_or_none,
int_or_none,
smuggle_url,
try_get,
unsmuggle_url,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py
index ad41c0e..2a7c6f0 100644
--- a/yt_dlp/extractor/linkedin.py
+++ b/yt_dlp/extractor/linkedin.py
@@ -1,4 +1,4 @@
-from itertools import zip_longest
+import itertools
import re
from .common import InfoExtractor
@@ -7,8 +7,8 @@ from ..utils import (
extract_attributes,
float_or_none,
int_or_none,
- srt_subtitles_timecode,
mimetype2ext,
+ srt_subtitles_timecode,
traverse_obj,
try_get,
url_or_none,
@@ -156,7 +156,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
def json2srt(self, transcript_lines, duration=None):
srt_data = ''
- for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
+ for line, (line_dict, next_dict) in enumerate(itertools.zip_longest(transcript_lines, transcript_lines[1:])):
start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time),
diff --git a/yt_dlp/extractor/loom.py b/yt_dlp/extractor/loom.py
new file mode 100644
index 0000000..1191aa1
--- /dev/null
+++ b/yt_dlp/extractor/loom.py
@@ -0,0 +1,461 @@
+import json
+import textwrap
+import urllib.parse
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ filter_dict,
+ get_first,
+ int_or_none,
+ parse_iso8601,
+ update_url,
+ url_or_none,
+ variadic,
+)
+from ..utils.traversal import traverse_obj
+
+
+class LoomIE(InfoExtractor):
+ IE_NAME = 'loom'
+ _VALID_URL = r'https?://(?:www\.)?loom\.com/(?:share|embed)/(?P<id>[\da-f]{32})'
+ _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=["\'](?P<url>{_VALID_URL})']
+ _TESTS = [{
+ # m3u8 raw-url, mp4 transcoded-url, cdn url == raw-url, json subs only
+ 'url': 'https://www.loom.com/share/43d05f362f734614a2e81b4694a3a523',
+ 'md5': 'bfc2d7e9c2e0eb4813212230794b6f42',
+ 'info_dict': {
+ 'id': '43d05f362f734614a2e81b4694a3a523',
+ 'ext': 'mp4',
+ 'title': 'A Ruler for Windows - 28 March 2022',
+ 'uploader': 'wILLIAM PIP',
+ 'upload_date': '20220328',
+ 'timestamp': 1648454238,
+ 'duration': 27,
+ },
+ }, {
+ # webm raw-url, mp4 transcoded-url, cdn url == transcoded-url, no subs
+ 'url': 'https://www.loom.com/share/c43a642f815f4378b6f80a889bb73d8d',
+ 'md5': '70f529317be8cf880fcc2c649a531900',
+ 'info_dict': {
+ 'id': 'c43a642f815f4378b6f80a889bb73d8d',
+ 'ext': 'webm',
+ 'title': 'Lilah Nielsen Intro Video',
+ 'uploader': 'Lilah Nielsen',
+ 'upload_date': '20200826',
+ 'timestamp': 1598480716,
+ 'duration': 20,
+ },
+ }, {
+ # m3u8 raw-url, mp4 transcoded-url, cdn url == raw-url, vtt sub and json subs
+ 'url': 'https://www.loom.com/share/9458bcbf79784162aa62ffb8dd66201b',
+ 'md5': '51737ec002969dd28344db4d60b9cbbb',
+ 'info_dict': {
+ 'id': '9458bcbf79784162aa62ffb8dd66201b',
+ 'ext': 'mp4',
+ 'title': 'Sharing screen with gpt-4',
+ 'description': 'Sharing screen with GPT 4 vision model and asking questions to guide through blender.',
+ 'uploader': 'Suneel Matham',
+ 'chapters': 'count:3',
+ 'upload_date': '20231109',
+ 'timestamp': 1699518978,
+ 'duration': 93,
+ },
+ }, {
+ # mpd raw-url, mp4 transcoded-url, cdn url == raw-url, no subs
+ 'url': 'https://www.loom.com/share/24351eb8b317420289b158e4b7e96ff2',
+ 'info_dict': {
+ 'id': '24351eb8b317420289b158e4b7e96ff2',
+ 'ext': 'webm',
+ 'title': 'OMFG clown',
+ 'description': 'md5:285c5ee9d62aa087b7e3271b08796815',
+ 'uploader': 'MrPumkin B',
+ 'upload_date': '20210924',
+ 'timestamp': 1632519618,
+ 'duration': 210,
+ },
+ 'params': {'skip_download': 'dash'},
+ }, {
+ # password-protected
+ 'url': 'https://www.loom.com/share/50e26e8aeb7940189dff5630f95ce1f4',
+ 'md5': '5cc7655e7d55d281d203f8ffd14771f7',
+ 'info_dict': {
+ 'id': '50e26e8aeb7940189dff5630f95ce1f4',
+ 'ext': 'mp4',
+ 'title': 'iOS Mobile Upload',
+ 'uploader': 'Simon Curran',
+ 'upload_date': '20200520',
+ 'timestamp': 1590000123,
+ 'duration': 35,
+ },
+ 'params': {'videopassword': 'seniorinfants2'},
+ }, {
+ # embed, transcoded-url endpoint sends empty JSON response
+ 'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
+ 'md5': '8488817242a0db1cb2ad0ea522553cf6',
+ 'info_dict': {
+ 'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
+ 'ext': 'mp4',
+ 'title': 'CF Reset User\'s Password',
+ 'uploader': 'Aimee Heintz',
+ 'upload_date': '20220707',
+ 'timestamp': 1657216459,
+ 'duration': 181,
+ },
+ 'expected_warnings': ['Failed to parse JSON'],
+ }]
+ _WEBPAGE_TESTS = [{
+ 'url': 'https://www.loom.com/community/e1229802a8694a09909e8ba0fbb6d073-pg',
+ 'md5': 'ec838cd01b576cf0386f32e1ae424609',
+ 'info_dict': {
+ 'id': 'e1229802a8694a09909e8ba0fbb6d073',
+ 'ext': 'mp4',
+ 'title': 'Rexie Jane Cimafranca - Founder\'s Presentation',
+ 'uploader': 'Rexie Cimafranca',
+ 'upload_date': '20230213',
+ 'duration': 247,
+ 'timestamp': 1676274030,
+ },
+ }]
+
+ _GRAPHQL_VARIABLES = {
+ 'GetVideoSource': {
+ 'acceptableMimes': ['DASH', 'M3U8', 'MP4'],
+ },
+ }
+ _GRAPHQL_QUERIES = {
+ 'GetVideoSSR': textwrap.dedent('''\
+ query GetVideoSSR($videoId: ID!, $password: String) {
+ getVideo(id: $videoId, password: $password) {
+ __typename
+ ... on PrivateVideo {
+ id
+ status
+ message
+ __typename
+ }
+ ... on VideoPasswordMissingOrIncorrect {
+ id
+ message
+ __typename
+ }
+ ... on RegularUserVideo {
+ id
+ __typename
+ createdAt
+ description
+ download_enabled
+ folder_id
+ is_protected
+ needs_password
+ owner {
+ display_name
+ __typename
+ }
+ privacy
+ s3_id
+ name
+ video_properties {
+ avgBitRate
+ client
+ camera_enabled
+ client_version
+ duration
+ durationMs
+ format
+ height
+ microphone_enabled
+ os
+ os_version
+ recordingClient
+ recording_type
+ recording_version
+ screen_type
+ tab_audio
+ trim_duration
+ width
+ __typename
+ }
+ playable_duration
+ source_duration
+ visibility
+ }
+ }
+ }\n'''),
+ 'GetVideoSource': textwrap.dedent('''\
+ query GetVideoSource($videoId: ID!, $password: String, $acceptableMimes: [CloudfrontVideoAcceptableMime]) {
+ getVideo(id: $videoId, password: $password) {
+ ... on RegularUserVideo {
+ id
+ nullableRawCdnUrl(acceptableMimes: $acceptableMimes, password: $password) {
+ url
+ __typename
+ }
+ __typename
+ }
+ __typename
+ }
+ }\n'''),
+ 'FetchVideoTranscript': textwrap.dedent('''\
+ query FetchVideoTranscript($videoId: ID!, $password: String) {
+ fetchVideoTranscript(videoId: $videoId, password: $password) {
+ ... on VideoTranscriptDetails {
+ id
+ video_id
+ source_url
+ captions_source_url
+ __typename
+ }
+ ... on GenericError {
+ message
+ __typename
+ }
+ __typename
+ }
+ }\n'''),
+ 'FetchChapters': textwrap.dedent('''\
+ query FetchChapters($videoId: ID!, $password: String) {
+ fetchVideoChapters(videoId: $videoId, password: $password) {
+ ... on VideoChapters {
+ video_id
+ content
+ __typename
+ }
+ ... on EmptyChaptersPayload {
+ content
+ __typename
+ }
+ ... on InvalidRequestWarning {
+ message
+ __typename
+ }
+ ... on Error {
+ message
+ __typename
+ }
+ __typename
+ }
+ }\n'''),
+ }
+ _APOLLO_GRAPHQL_VERSION = '0a1856c'
+
+ def _call_graphql_api(self, operations, video_id, note=None, errnote=None):
+ password = self.get_param('videopassword')
+ return self._download_json(
+ 'https://www.loom.com/graphql', video_id, note or 'Downloading GraphQL JSON',
+ errnote or 'Failed to download GraphQL JSON', headers={
+ 'Accept': 'application/json',
+ 'Content-Type': 'application/json',
+ 'x-loom-request-source': f'loom_web_{self._APOLLO_GRAPHQL_VERSION}',
+ 'apollographql-client-name': 'web',
+ 'apollographql-client-version': self._APOLLO_GRAPHQL_VERSION,
+ }, data=json.dumps([{
+ 'operationName': operation_name,
+ 'variables': {
+ 'videoId': video_id,
+ 'password': password,
+ **self._GRAPHQL_VARIABLES.get(operation_name, {}),
+ },
+ 'query': self._GRAPHQL_QUERIES[operation_name],
+ } for operation_name in variadic(operations)], separators=(',', ':')).encode())
+
+ def _call_url_api(self, endpoint, video_id):
+ response = self._download_json(
+ f'https://www.loom.com/api/campaigns/sessions/{video_id}/{endpoint}', video_id,
+ f'Downloading {endpoint} JSON', f'Failed to download {endpoint} JSON', fatal=False,
+ headers={'Accept': 'application/json', 'Content-Type': 'application/json'},
+ data=json.dumps({
+ 'anonID': str(uuid.uuid4()),
+ 'deviceID': None,
+ 'force_original': False, # HTTP error 401 if True
+ 'password': self.get_param('videopassword'),
+ }, separators=(',', ':')).encode())
+ return traverse_obj(response, ('url', {url_or_none}))
+
+ def _extract_formats(self, video_id, metadata, gql_data):
+ formats = []
+ video_properties = traverse_obj(metadata, ('video_properties', {
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'acodec': ('microphone_enabled', {lambda x: 'none' if x is False else None}),
+ }))
+
+ def get_formats(format_url, format_id, quality):
+ if not format_url:
+ return
+ ext = determine_ext(format_url)
+ query = urllib.parse.urlparse(format_url).query
+
+ if ext == 'm3u8':
+ # Extract pre-merged HLS formats to avoid buggy parsing of metadata in split playlists
+ format_url = format_url.replace('-split.m3u8', '.m3u8')
+ m3u8_formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
+ for fmt in m3u8_formats:
+ yield {
+ **fmt,
+ 'url': update_url(fmt['url'], query=query),
+ 'extra_param_to_segment_url': query,
+ }
+
+ elif ext == 'mpd':
+ dash_formats = self._extract_mpd_formats(
+ format_url, video_id, mpd_id=f'dash-{format_id}', fatal=False)
+ for fmt in dash_formats:
+ yield {
+ **fmt,
+ 'extra_param_to_segment_url': query,
+ 'quality': quality,
+ }
+
+ else:
+ yield {
+ 'url': format_url,
+ 'ext': ext,
+ 'format_id': f'http-{format_id}',
+ 'quality': quality,
+ **video_properties,
+ }
+
+ raw_url = self._call_url_api('raw-url', video_id)
+ formats.extend(get_formats(raw_url, 'raw', quality=1)) # original quality
+
+ transcoded_url = self._call_url_api('transcoded-url', video_id)
+ formats.extend(get_formats(transcoded_url, 'transcoded', quality=-1)) # transcoded quality
+
+ cdn_url = get_first(gql_data, ('data', 'getVideo', 'nullableRawCdnUrl', 'url', {url_or_none}))
+ # cdn_url is usually a dupe, but the raw-url/transcoded-url endpoints could return errors
+ valid_urls = [update_url(url, query=None) for url in (raw_url, transcoded_url) if url]
+ if cdn_url and update_url(cdn_url, query=None) not in valid_urls:
+ formats.extend(get_formats(cdn_url, 'cdn', quality=0)) # could be original or transcoded
+
+ return formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ metadata = get_first(
+ self._call_graphql_api('GetVideoSSR', video_id, 'Downloading GraphQL metadata JSON'),
+ ('data', 'getVideo', {dict})) or {}
+
+ if metadata.get('__typename') == 'VideoPasswordMissingOrIncorrect':
+ if not self.get_param('videopassword'):
+ raise ExtractorError(
+ 'This video is password-protected, use the --video-password option', expected=True)
+ raise ExtractorError('Invalid video password', expected=True)
+
+ gql_data = self._call_graphql_api(['FetchChapters', 'FetchVideoTranscript', 'GetVideoSource'], video_id)
+ duration = traverse_obj(metadata, ('video_properties', 'duration', {int_or_none}))
+
+ return {
+ 'id': video_id,
+ 'duration': duration,
+ 'chapters': self._extract_chapters_from_description(
+ get_first(gql_data, ('data', 'fetchVideoChapters', 'content', {str})), duration) or None,
+ 'formats': self._extract_formats(video_id, metadata, gql_data),
+ 'subtitles': filter_dict({
+ 'en': traverse_obj(gql_data, (
+ ..., 'data', 'fetchVideoTranscript',
+ ('source_url', 'captions_source_url'), {
+ 'url': {url_or_none},
+ })) or None,
+ }),
+ **traverse_obj(metadata, {
+ 'title': ('name', {str}),
+ 'description': ('description', {str}),
+ 'uploader': ('owner', 'display_name', {str}),
+ 'timestamp': ('createdAt', {parse_iso8601}),
+ }),
+ }
+
+
+class LoomFolderIE(InfoExtractor):
+ IE_NAME = 'loom:folder'
+ _VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P<id>[\da-f]{32})'
+ _TESTS = [{
+ # 2 subfolders, no videos in root
+ 'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c',
+ 'playlist_mincount': 16,
+ 'info_dict': {
+ 'id': '997db4db046f43e5912f10dc5f817b5c',
+ 'title': 'Blending Lessons',
+ },
+ }, {
+ # only videos, no subfolders
+ 'url': 'https://www.loom.com/share/folder/9a8a87f6b6f546d9a400c8e7575ff7f2',
+ 'playlist_mincount': 12,
+ 'info_dict': {
+ 'id': '9a8a87f6b6f546d9a400c8e7575ff7f2',
+ 'title': 'List A- a, i, o',
+ },
+ }, {
+ # videos in root and empty subfolder
+ 'url': 'https://www.loom.com/share/folder/886e534218c24fd292e97e9563078cc4',
+ 'playlist_mincount': 21,
+ 'info_dict': {
+ 'id': '886e534218c24fd292e97e9563078cc4',
+ 'title': 'Medicare Agent Training videos',
+ },
+ }, {
+ # videos in root and videos in subfolders
+ 'url': 'https://www.loom.com/share/folder/b72c4ecdf04745da9403926d80a40c38',
+ 'playlist_mincount': 21,
+ 'info_dict': {
+ 'id': 'b72c4ecdf04745da9403926d80a40c38',
+ 'title': 'Quick Altos Q & A Tutorials',
+ },
+ }, {
+ # recursive folder extraction
+ 'url': 'https://www.loom.com/share/folder/8b458a94e0e4449b8df9ea7a68fafc4e',
+ 'playlist_count': 23,
+ 'info_dict': {
+ 'id': '8b458a94e0e4449b8df9ea7a68fafc4e',
+ 'title': 'Sezer Texting Guide',
+ },
+ }, {
+ # more than 50 videos in 1 folder
+ 'url': 'https://www.loom.com/share/folder/e056a91d290d47ca9b00c9d1df56c463',
+ 'playlist_mincount': 61,
+ 'info_dict': {
+ 'id': 'e056a91d290d47ca9b00c9d1df56c463',
+ 'title': 'User Videos',
+ },
+ }, {
+ # many subfolders
+ 'url': 'https://www.loom.com/share/folder/c2dde8cc67454f0e99031677279d8954',
+ 'playlist_mincount': 75,
+ 'info_dict': {
+ 'id': 'c2dde8cc67454f0e99031677279d8954',
+ 'title': 'Honors 1',
+ },
+ }, {
+ 'url': 'https://www.loom.com/share/folder/bae17109a68146c7803454f2893c8cf8/Edpuzzle',
+ 'only_matching': True,
+ }]
+
+ def _extract_folder_data(self, folder_id):
+ return self._download_json(
+ f'https://www.loom.com/v1/folders/{folder_id}', folder_id,
+ 'Downloading folder info JSON', query={'limit': '10000'})
+
+ def _extract_folder_entries(self, folder_id, initial_folder_data=None):
+ folder_data = initial_folder_data or self._extract_folder_data(folder_id)
+
+ for video in traverse_obj(folder_data, ('videos', lambda _, v: v['id'])):
+ video_id = video['id']
+ yield self.url_result(
+ f'https://www.loom.com/share/{video_id}', LoomIE, video_id, video.get('name'))
+
+ # Recurse into subfolders
+ for subfolder_id in traverse_obj(folder_data, (
+ 'folders', lambda _, v: v['id'] != folder_id, 'id', {str})):
+ yield from self._extract_folder_entries(subfolder_id)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ playlist_data = self._extract_folder_data(playlist_id)
+
+ return self.playlist_result(
+ self._extract_folder_entries(playlist_id, playlist_data), playlist_id,
+ traverse_obj(playlist_data, ('folder', 'name', {str.strip})))
diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py
index fd9bba8..fa12a6a 100644
--- a/yt_dlp/extractor/mainstreaming.py
+++ b/yt_dlp/extractor/mainstreaming.py
@@ -1,14 +1,13 @@
import re
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
js_to_json,
parse_duration,
traverse_obj,
try_get,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py
index 2792e6e..44c321c 100644
--- a/yt_dlp/extractor/manoto.py
+++ b/yt_dlp/extractor/manoto.py
@@ -1,10 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- clean_html,
- int_or_none,
- traverse_obj
-)
-
+from ..utils import clean_html, int_or_none, traverse_obj
_API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}'
diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py
index 716f1c9..c3c58d7 100644
--- a/yt_dlp/extractor/masters.py
+++ b/yt_dlp/extractor/masters.py
@@ -1,4 +1,3 @@
-from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
traverse_obj,
diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py
index 675ad8c..d040fb4 100644
--- a/yt_dlp/extractor/medaltv.py
+++ b/yt_dlp/extractor/medaltv.py
@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
- format_field,
float_or_none,
+ format_field,
int_or_none,
str_or_none,
traverse_obj,
diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py
index fcc4827..c015977 100644
--- a/yt_dlp/extractor/mediaklikk.py
+++ b/yt_dlp/extractor/mediaklikk.py
@@ -1,14 +1,11 @@
+from .common import InfoExtractor
+from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
traverse_obj,
unified_strdate,
url_or_none,
)
-from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_str
-)
class MediaKlikkIE(InfoExtractor):
diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py
index e04a1ce..b7df5c7 100644
--- a/yt_dlp/extractor/mediaset.py
+++ b/yt_dlp/extractor/mediaset.py
@@ -5,11 +5,11 @@ from .theplatform import ThePlatformBaseIE
from ..utils import (
ExtractorError,
GeoRestrictedError,
- int_or_none,
OnDemandPagedList,
+ int_or_none,
try_get,
- urljoin,
update_url_query,
+ urljoin,
)
diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py
index 7ea78ab..d3fec4e 100644
--- a/yt_dlp/extractor/mediasite.py
+++ b/yt_dlp/extractor/mediasite.py
@@ -1,5 +1,5 @@
-import re
import json
+import re
from .common import InfoExtractor
from ..compat import (
@@ -10,16 +10,15 @@ from ..utils import (
ExtractorError,
float_or_none,
mimetype2ext,
+ smuggle_url,
str_or_none,
try_call,
try_get,
- smuggle_url,
unsmuggle_url,
url_or_none,
urljoin,
)
-
_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
diff --git a/yt_dlp/extractor/medici.py b/yt_dlp/extractor/medici.py
index 328ccd2..b6235b6 100644
--- a/yt_dlp/extractor/medici.py
+++ b/yt_dlp/extractor/medici.py
@@ -1,67 +1,153 @@
+import urllib.parse
+
from .common import InfoExtractor
from ..utils import (
- unified_strdate,
- update_url_query,
- urlencode_postdata,
+ filter_dict,
+ parse_iso8601,
+ traverse_obj,
+ try_call,
+ url_or_none,
)
class MediciIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
- _TEST = {
- 'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
- 'md5': '004c21bb0a57248085b6ff3fec72719d',
+ _VALID_URL = r'https?://(?:(?P<sub>www|edu)\.)?medici\.tv/[a-z]{2}/[\w.-]+/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.medici.tv/en/operas/thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
+ 'md5': 'd483f74e7a7a9eac0dbe152ab189050d',
+ 'info_dict': {
+ 'id': '8032',
+ 'ext': 'mp4',
+ 'title': 'Thomas Adès\'s The Exterminating Angel',
+ 'description': 'md5:708ae6350dadc604225b4a6e32482bab',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'upload_date': '20240304',
+ 'timestamp': 1709561766,
+ 'display_id': 'thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
+ },
+ 'expected_warnings': [r'preview'],
+ }, {
+ 'url': 'https://edu.medici.tv/en/operas/wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
+ 'md5': '4ef3f4079a6e1c617584463a9eb84f99',
+ 'info_dict': {
+ 'id': '7900',
+ 'ext': 'mp4',
+ 'title': 'Wagner\'s Lohengrin',
+ 'description': 'md5:a384a62937866101f86902f21752cd89',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'upload_date': '20231017',
+ 'timestamp': 1697554771,
+ 'display_id': 'wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
+ },
+ 'expected_warnings': [r'preview'],
+ }, {
+ 'url': 'https://www.medici.tv/en/concerts/sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
+ 'md5': '9dd757e53b22b2511e85ea9ea60e4815',
+ 'info_dict': {
+ 'id': '5712',
+ 'ext': 'mp4',
+ 'title': 'Sergey Smbatyan conducts Tigran Mansurian — With Chouchane Siranossian and Mario Brunello',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'description': 'md5:9411fe44c874bb10e9af288c65816e41',
+ 'upload_date': '20200323',
+ 'timestamp': 1584975600,
+ 'display_id': 'sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
+ },
+ 'expected_warnings': [r'preview'],
+ }, {
+ 'url': 'https://www.medici.tv/en/ballets/carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
+ 'md5': '40f5e76cb701a97a6d7ba23b62c49990',
+ 'info_dict': {
+ 'id': '7857',
+ 'ext': 'mp4',
+ 'title': 'Carmen by Jiří Bubeníček after Roland Petit, music by Bizet, de Falla, Castelnuovo-Tedesco, and Bonolis',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'description': 'md5:0f15a15611ed748020c769873e10a8bb',
+ 'upload_date': '20240223',
+ 'timestamp': 1708707600,
+ 'display_id': 'carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
+ },
+ 'expected_warnings': [r'preview'],
+ }, {
+ 'url': 'https://www.medici.tv/en/documentaries/la-sonnambula-liege-2023-documentaire',
+ 'md5': '87ff198018ce79a34757ab0dd6f21080',
+ 'info_dict': {
+ 'id': '7513',
+ 'ext': 'mp4',
+ 'title': 'La Sonnambula',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'description': 'md5:0caf9109a860fd50cd018df062a67f34',
+ 'upload_date': '20231103',
+ 'timestamp': 1699010830,
+ 'display_id': 'la-sonnambula-liege-2023-documentaire',
+ },
+ 'expected_warnings': [r'preview'],
+ }, {
+ 'url': 'https://edu.medici.tv/en/masterclasses/yvonne-loriod-olivier-messiaen',
+ 'md5': 'fb5dcec46d76ad20fbdbaabb01da191d',
+ 'info_dict': {
+ 'id': '3024',
+ 'ext': 'mp4',
+ 'title': 'Olivier Messiaen and Yvonne Loriod, pianists and teachers',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'description': 'md5:aab948e2f7690214b5c28896c83f1fc1',
+ 'upload_date': '20150223',
+ 'timestamp': 1424706608,
+ 'display_id': 'yvonne-loriod-olivier-messiaen',
+ },
+ 'skip': 'Requires authentication; preview starts in the middle',
+ }, {
+ 'url': 'https://www.medici.tv/en/jazz/makaya-mccraven-la-rochelle',
+ 'md5': '4cc279a8b06609782747c8f50beea2b3',
'info_dict': {
- 'id': '3059',
- 'ext': 'flv',
- 'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
- 'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20170408',
+ 'id': '7922',
+ 'ext': 'mp4',
+ 'title': 'NEW: Makaya McCraven in La Rochelle',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'description': 'md5:b5a8aaeb6993d8ccb18bde8abb8aa8d2',
+ 'upload_date': '20231228',
+ 'timestamp': 1703754863,
+ 'display_id': 'makaya-mccraven-la-rochelle',
},
- }
+ 'expected_warnings': [r'preview'],
+ }]
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- # Sets csrftoken cookie
- self._download_webpage(url, video_id)
+ display_id, subdomain = self._match_valid_url(url).group('id', 'sub')
+ self._request_webpage(url, display_id, 'Requesting CSRF token cookie')
- MEDICI_URL = 'http://www.medici.tv/'
+ subdomain = 'edu-' if subdomain == 'edu' else ''
+ origin = f'https://{urllib.parse.urlparse(url).hostname}'
data = self._download_json(
- MEDICI_URL, video_id,
- data=urlencode_postdata({
- 'json': 'true',
- 'page': '/%s' % video_id,
- 'timezone_offset': -420,
- }), headers={
- 'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
- 'X-Requested-With': 'XMLHttpRequest',
- 'Referer': MEDICI_URL,
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
-
- video = data['video']['videos']['video1']
-
- title = video.get('nom') or data['title']
+ f'https://api.medici.tv/{subdomain}satie/edito/movie-file/{display_id}/', display_id,
+ headers=filter_dict({
+ 'Authorization': try_call(
+ lambda: urllib.parse.unquote(self._get_cookies(url)['auth._token.mAuth'].value)),
+ 'Device-Type': 'web',
+ 'Origin': origin,
+ 'Referer': f'{origin}/',
+ 'Accept': 'application/json, text/plain, */*',
+ }))
- video_id = video.get('id') or video_id
- formats = self._extract_f4m_formats(
- update_url_query(video['url_akamai'], {
- 'hdcore': '3.1.0',
- 'plugin=aasp': '3.1.0.43.124',
- }), video_id, f4m_id='hds')
+ if not traverse_obj(data, ('video', 'is_full_video')) and traverse_obj(
+ data, ('video', 'is_limited_by_user_access')):
+ self.report_warning(
+ 'The full video is for subscribers only. Only previews will be downloaded. If you '
+ 'have used the --cookies-from-browser option, try using the --cookies option instead')
- description = data.get('meta_description')
- thumbnail = video.get('url_thumbnail') or data.get('main_image')
- upload_date = unified_strdate(data['video'].get('date'))
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ data['video']['video_url'], display_id, 'mp4')
return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
+ 'id': str(data['id']),
+ 'display_id': display_id,
'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('subtitle', {str}),
+ 'thumbnail': ('picture', {url_or_none}),
+ 'timestamp': ('date_publish', {parse_iso8601}),
+ }),
}
diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py
index 9b50996..f6a0b41 100644
--- a/yt_dlp/extractor/microsoftstream.py
+++ b/yt_dlp/extractor/microsoftstream.py
@@ -1,10 +1,10 @@
-from base64 import b64decode
+import base64
from .common import InfoExtractor
from ..utils import (
merge_dicts,
- parse_iso8601,
parse_duration,
+ parse_iso8601,
parse_resolution,
try_get,
url_basename,
@@ -81,7 +81,7 @@ class MicrosoftStreamIE(InfoExtractor):
'url': thumbnail_url,
}
thumb_name = url_basename(thumbnail_url)
- thumb_name = str(b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
+ thumb_name = str(base64.b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
thumb.update(parse_resolution(thumb_name))
thumbnails.append(thumb)
diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py
index f64d575..caf60c8 100644
--- a/yt_dlp/extractor/mildom.py
+++ b/yt_dlp/extractor/mildom.py
@@ -4,11 +4,11 @@ import uuid
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
determine_ext,
dict_get,
- ExtractorError,
float_or_none,
- OnDemandPagedList,
traverse_obj,
)
diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py
index 38cc0c2..979584e 100644
--- a/yt_dlp/extractor/mit.py
+++ b/yt_dlp/extractor/mit.py
@@ -1,11 +1,11 @@
-import re
import json
+import re
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
get_element_by_id,
)
diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py
index 4be6947..58c4a23 100644
--- a/yt_dlp/extractor/mixch.py
+++ b/yt_dlp/extractor/mixch.py
@@ -1,5 +1,13 @@
from .common import InfoExtractor
-from ..utils import UserNotLive, traverse_obj
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ int_or_none,
+ str_or_none,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
class MixchIE(InfoExtractor):
@@ -7,17 +15,20 @@ class MixchIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://mixch.tv/u/16236849/live',
+ 'url': 'https://mixch.tv/u/16943797/live',
'skip': 'don\'t know if this live persists',
'info_dict': {
- 'id': '16236849',
- 'title': '24配信シェア⭕️投票🙏💦',
- 'comment_count': 13145,
- 'view_count': 28348,
- 'timestamp': 1636189377,
- 'uploader': '🦥伊咲👶🏻#フレアワ',
- 'uploader_id': '16236849',
- }
+ 'id': '16943797',
+ 'ext': 'mp4',
+ 'title': '#EntView #カリナ #セブチ 2024-05-05 06:58',
+ 'comment_count': int,
+ 'view_count': int,
+ 'timestamp': 1714726805,
+ 'uploader': 'Ent.View K-news🎶💕',
+ 'uploader_id': '16943797',
+ 'live_status': 'is_live',
+ 'upload_date': '20240503',
+ },
}, {
'url': 'https://mixch.tv/u/16137876/live',
'only_matching': True,
@@ -25,31 +36,41 @@ class MixchIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(f'https://mixch.tv/u/{video_id}/live', video_id)
-
- initial_js_state = self._parse_json(self._search_regex(
- r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id)
- if not initial_js_state.get('liveInfo'):
+ data = self._download_json(f'https://mixch.tv/api-web/users/{video_id}/live', video_id)
+ if not traverse_obj(data, ('liveInfo', {dict})):
raise UserNotLive(video_id=video_id)
return {
'id': video_id,
- 'title': traverse_obj(initial_js_state, ('liveInfo', 'title')),
- 'comment_count': traverse_obj(initial_js_state, ('liveInfo', 'comments')),
- 'view_count': traverse_obj(initial_js_state, ('liveInfo', 'visitor')),
- 'timestamp': traverse_obj(initial_js_state, ('liveInfo', 'created')),
- 'uploader': traverse_obj(initial_js_state, ('broadcasterInfo', 'name')),
'uploader_id': video_id,
+ **traverse_obj(data, {
+ 'title': ('liveInfo', 'title', {str}),
+ 'comment_count': ('liveInfo', 'comments', {int_or_none}),
+ 'view_count': ('liveInfo', 'visitor', {int_or_none}),
+ 'timestamp': ('liveInfo', 'created', {int_or_none}),
+ 'uploader': ('broadcasterInfo', 'name', {str}),
+ }),
'formats': [{
'format_id': 'hls',
- 'url': (traverse_obj(initial_js_state, ('liveInfo', 'hls'))
- or f'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_{video_id}.m3u8'),
+ 'url': data['liveInfo']['hls'],
'ext': 'mp4',
'protocol': 'm3u8',
}],
'is_live': True,
+ '__post_extractor': self.extract_comments(video_id),
}
+ def _get_comments(self, video_id):
+ yield from traverse_obj(self._download_json(
+ f'https://mixch.tv/api-web/lives/{video_id}/messages', video_id,
+ note='Downloading comments', errnote='Failed to download comments'), (..., {
+ 'author': ('name', {str}),
+ 'author_id': ('user_id', {str_or_none}),
+ 'id': ('message_id', {str}, {lambda x: x or None}),
+ 'text': ('body', {str}),
+ 'timestamp': ('created', {int}),
+ }))
+
class MixchArchiveIE(InfoExtractor):
IE_NAME = 'mixch:archive'
@@ -60,22 +81,38 @@ class MixchArchiveIE(InfoExtractor):
'skip': 'paid video, no DRM. expires at Jan 23',
'info_dict': {
'id': '421',
+ 'ext': 'mp4',
'title': '96NEKO SHOW TIME',
}
+ }, {
+ 'url': 'https://mixch.tv/archive/1213',
+ 'skip': 'paid video, no DRM. expires at Dec 31, 2023',
+ 'info_dict': {
+ 'id': '1213',
+ 'ext': 'mp4',
+ 'title': '【特別トーク番組アーカイブス】Merm4id×燐舞曲 2nd LIVE「VERSUS」',
+ 'release_date': '20231201',
+ 'thumbnail': str,
+ }
+ }, {
+ 'url': 'https://mixch.tv/archive/1214',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- html5_videos = self._parse_html5_media_entries(
- url, webpage.replace('video-js', 'video'), video_id, 'hls')
- if not html5_videos:
- self.raise_login_required(method='cookies')
- infodict = html5_videos[0]
- infodict.update({
- 'id': video_id,
- 'title': self._html_search_regex(r'class="archive-title">(.+?)</', webpage, 'title')
- })
+ try:
+ info_json = self._download_json(
+ f'https://mixch.tv/api-web/archive/{video_id}', video_id)['archive']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 401:
+ self.raise_login_required()
+ raise
- return infodict
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(info_json, ('title', {str})),
+ 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
+ 'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
+ }
diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py
index a69a12e..411d41c 100644
--- a/yt_dlp/extractor/monstercat.py
+++ b/yt_dlp/extractor/monstercat.py
@@ -8,10 +8,10 @@ from ..utils import (
get_element_html_by_class,
get_element_text_and_html_by_tag,
int_or_none,
- unified_strdate,
strip_or_none,
traverse_obj,
try_call,
+ unified_strdate,
)
diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py
index 160150a..b6c18fe 100644
--- a/yt_dlp/extractor/motherless.py
+++ b/yt_dlp/extractor/motherless.py
@@ -1,4 +1,4 @@
-import datetime
+import datetime as dt
import re
import urllib.parse
@@ -151,7 +151,7 @@ class MotherlessIE(InfoExtractor):
'd': 'days',
}
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
- upload_date = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
+ upload_date = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(**kwargs)).strftime('%Y%m%d')
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
uploader_id = self._html_search_regex(
diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py
index 668c098..ed5be4f 100644
--- a/yt_dlp/extractor/moviepilot.py
+++ b/yt_dlp/extractor/moviepilot.py
@@ -1,5 +1,5 @@
-from .dailymotion import DailymotionIE
from .common import InfoExtractor
+from .dailymotion import DailymotionIE
class MoviepilotIE(InfoExtractor):
@@ -14,7 +14,7 @@ class MoviepilotIE(InfoExtractor):
'display_id': 'interstellar-2',
'ext': 'mp4',
'title': 'Interstellar',
- 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1ZganMw4HVXg/x1080',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1.*/x1080',
'timestamp': 1605010596,
'description': 'md5:0ae9cb452af52610c9ffc60f2fd0474c',
'uploader': 'Moviepilot',
@@ -71,7 +71,7 @@ class MoviepilotIE(InfoExtractor):
'age_limit': 0,
'duration': 82,
'upload_date': '20201109',
- 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Zg3lxLv9j5u/x1080',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Z.*/x1080',
'uploader': 'Moviepilot',
'like_count': int,
'view_count': int,
@@ -92,6 +92,6 @@ class MoviepilotIE(InfoExtractor):
'ie_key': DailymotionIE.ie_key(),
'display_id': video_id,
'title': clip.get('title'),
- 'url': f'https://www.dailymotion.com/video/{clip["videoRemoteId"]}',
+ 'url': f'https://www.dailymotion.com/video/{clip["video"]["remoteId"]}',
'description': clip.get('summary'),
}
diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py
index cdd8ba4..6e0ea26 100644
--- a/yt_dlp/extractor/movingimage.py
+++ b/yt_dlp/extractor/movingimage.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- unescapeHTML,
parse_duration,
+ unescapeHTML,
)
diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py
index 77d1806..79728e1 100644
--- a/yt_dlp/extractor/msn.py
+++ b/yt_dlp/extractor/msn.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py
index edc4144..8a8a5fe 100644
--- a/yt_dlp/extractor/n1.py
+++ b/yt_dlp/extractor/n1.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- unified_timestamp,
extract_attributes,
+ unified_timestamp,
)
diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py
index 806b790..26400e3 100644
--- a/yt_dlp/extractor/naver.py
+++ b/yt_dlp/extractor/naver.py
@@ -5,7 +5,7 @@ import itertools
import json
import re
import time
-from urllib.parse import parse_qs, urlparse
+import urllib.parse
from .common import InfoExtractor
from ..utils import (
@@ -388,7 +388,7 @@ class NaverNowIE(NaverBaseIE):
def _real_extract(self, url):
show_id = self._match_id(url)
- qs = parse_qs(urlparse(url).query)
+ qs = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
if not self._yes_playlist(show_id, qs.get('shareHightlight')):
return self._extract_highlight(show_id, qs['shareHightlight'][0])
diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py
index 81d11e3..ec4d636 100644
--- a/yt_dlp/extractor/nba.py
+++ b/yt_dlp/extractor/nba.py
@@ -7,9 +7,9 @@ from ..compat import (
compat_urllib_parse_unquote,
)
from ..utils import (
+ OnDemandPagedList,
int_or_none,
merge_dicts,
- OnDemandPagedList,
parse_duration,
parse_iso8601,
parse_qs,
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index 267fa83..e88f98a 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -3,9 +3,9 @@ import json
import re
import xml.etree.ElementTree
+from .adobepass import AdobePassIE
from .common import InfoExtractor
from .theplatform import ThePlatformIE, default_ns
-from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote
from ..networking import HEADRequest
from ..utils import (
diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py
index 41ea362..243221d 100644
--- a/yt_dlp/extractor/ndr.py
+++ b/yt_dlp/extractor/ndr.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
merge_dicts,
parse_iso8601,
diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py
index d332b84..b54c12e 100644
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@@ -1,9 +1,9 @@
+import hashlib
import itertools
import json
+import random
import re
import time
-from hashlib import md5
-from random import randint
from .common import InfoExtractor
from ..aes import aes_ecb_encrypt, pkcs7_padding
@@ -34,7 +34,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1')
- msg_digest = md5(message).hexdigest()
+ msg_digest = hashlib.md5(message).hexdigest()
data = pkcs7_padding(list(str.encode(
f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}')))
@@ -53,7 +53,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
'__csrf': '',
'os': 'pc',
'channel': 'undefined',
- 'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}',
+ 'requestId': f'{int(time.time() * 1000)}_{random.randint(0, 1000):04}',
**traverse_obj(self._get_cookies(self._API_BASE), {
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
})
@@ -561,7 +561,8 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'timestamp': ('createTime', {self.kilo_or_none}),
})
- if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
+ if not self._yes_playlist(
+ info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
formats = self.extract_formats(info['mainSong'])
return {
diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py
index 6f78728..968c972 100644
--- a/yt_dlp/extractor/nfb.py
+++ b/yt_dlp/extractor/nfb.py
@@ -5,7 +5,6 @@ from ..utils import (
merge_dicts,
parse_count,
url_or_none,
- urljoin,
)
from ..utils.traversal import traverse_obj
@@ -16,8 +15,7 @@ class NFBBaseIE(InfoExtractor):
def _extract_ep_data(self, webpage, video_id, fatal=False):
return self._search_json(
- r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
- contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
+ r'episodesData\s*:', webpage, 'episode data', video_id, fatal=fatal) or {}
def _extract_ep_info(self, data, video_id, slug=None):
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
@@ -224,18 +222,14 @@ class NFBIE(NFBBaseIE):
# type_ can change from film to serie(s) after redirect; new slug may have episode number
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
- embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
- r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
- video_id = self._match_id(embed_url) # embed url has unique slug
- player = self._download_webpage(embed_url, video_id, 'Downloading player page')
- if 'MESSAGE_GEOBLOCKED' in player:
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ player_data = self._search_json(
+ r'window\.PLAYER_OPTIONS\[[^\]]+\]\s*=', webpage, 'player data', slug)
+ video_id = self._match_id(player_data['overlay']['url']) # overlay url always has unique slug
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
- video_id, 'mp4', m3u8_id='hls')
+ player_data['source'], video_id, 'mp4', m3u8_id='hls')
- if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
+ if dv_source := url_or_none(player_data.get('dvSource')):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
for fmt in fmts:
@@ -246,17 +240,16 @@ class NFBIE(NFBBaseIE):
info = {
'id': video_id,
'title': self._html_search_regex(
- r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
+ r'["\']nfb_version_title["\']\s*:\s*["\']([^"\']+)',
webpage, 'title', default=None),
'description': self._html_search_regex(
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
webpage, 'description', default=None),
- 'thumbnail': self._html_search_regex(
- r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
+ 'thumbnail': url_or_none(player_data.get('poster')),
'uploader': self._html_search_regex(
- r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
+ r'<[^>]+\bitemprop=["\']director["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
'release_year': int_or_none(self._html_search_regex(
- r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
+ r'["\']nfb_version_year["\']\s*:\s*["\']([^"\']+)',
webpage, 'release_year', default=None)),
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py
index febad8f..be732a3 100644
--- a/yt_dlp/extractor/nfhsnetwork.py
+++ b/yt_dlp/extractor/nfhsnetwork.py
@@ -1,11 +1,5 @@
from .common import InfoExtractor
-
-
-from ..utils import (
- try_get,
- unified_strdate,
- unified_timestamp
-)
+from ..utils import try_get, unified_strdate, unified_timestamp
class NFHSNetworkIE(InfoExtractor):
diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
index 7cf5b24..8bb017a 100644
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@@ -8,6 +8,7 @@ from ..utils import (
int_or_none,
join_nonempty,
parse_duration,
+ remove_end,
traverse_obj,
try_call,
unescapeHTML,
@@ -19,8 +20,7 @@ from ..utils import (
class NhkBaseIE(InfoExtractor):
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
- _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
- _TYPE_REGEX = r'/(?P<type>video|audio)/'
+ _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/'
def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
return self._download_json(
@@ -83,7 +83,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
- is_video = m_type == 'video'
+ is_video = m_type != 'audio'
if is_video:
episode_id = episode_id[:4] + '-' + episode_id[4:]
@@ -138,9 +138,10 @@ class NhkBaseIE(InfoExtractor):
else:
if fetch_episode:
- audio_path = episode['audio']['audio']
+ # From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html
+ audio_path = remove_end(episode['audio']['audio'], '.m4a')
info['formats'] = self._extract_m3u8_formats(
- 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
+ f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8',
episode_id, 'm4a', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
for f in info['formats']:
@@ -155,9 +156,11 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE):
- # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
- _VALID_URL = [rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>video)/(?P<id>[0-9a-z]+)',
- rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[0-9a-z]+)']
+ _VALID_URL = [
+ rf'{NhkBaseIE._BASE_URL_REGEX}shows/(?:(?P<type>video)/)?(?P<id>\d{{4}}[\da-z]\d+)/?(?:$|[?#])',
+ rf'{NhkBaseIE._BASE_URL_REGEX}(?:ondemand|shows)/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[\da-z]+)',
+ rf'{NhkBaseIE._BASE_URL_REGEX}ondemand/(?P<type>video)/(?P<id>\d{{4}}[\da-z]\d+)', # deprecated
+ ]
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{
@@ -167,17 +170,16 @@ class NhkVodIE(NhkBaseIE):
'ext': 'mp4',
'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
- 'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463',
+ 'thumbnail': r're:https://.+/.+\.jpg',
'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
'series': 'Japan Railway Journal',
- 'modified_timestamp': 1694243656,
+ 'modified_timestamp': 1707217907,
'timestamp': 1681428600,
'release_timestamp': 1693883728,
'duration': 1679,
'upload_date': '20230413',
- 'modified_date': '20230909',
+ 'modified_date': '20240206',
'release_date': '20230905',
-
},
}, {
# video clip
@@ -188,15 +190,15 @@ class NhkVodIE(NhkBaseIE):
'ext': 'mp4',
'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
- 'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed',
+ 'thumbnail': r're:https://.+/.+\.jpg',
'series': 'Dining with the Chef',
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
'duration': 148,
'upload_date': '20190816',
'release_date': '20230902',
'release_timestamp': 1693619292,
- 'modified_timestamp': 1694168033,
- 'modified_date': '20230908',
+ 'modified_timestamp': 1707217907,
+ 'modified_date': '20240206',
'timestamp': 1565997540,
},
}, {
@@ -208,7 +210,7 @@ class NhkVodIE(NhkBaseIE):
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
'series': 'Living in Japan',
'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
- 'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545',
+ 'thumbnail': r're:https://.+/.+\.jpg',
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
},
}, {
@@ -245,7 +247,7 @@ class NhkVodIE(NhkBaseIE):
'title': 'おはよう日本(7時台) - 10月8日放送',
'series': 'おはよう日本(7時台)',
'episode': '10月8日放送',
- 'thumbnail': 'md5:d733b1c8e965ab68fb02b2d347d0e9b4',
+ 'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
},
'skip': 'expires 2023-10-15',
@@ -255,17 +257,100 @@ class NhkVodIE(NhkBaseIE):
'info_dict': {
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
'ext': 'mp4',
- 'title': 'Barakan Discovers AMAMI OSHIMA: Isson\'s Treasure Island',
+ 'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla',
'description': 'md5:5db620c46a0698451cc59add8816b797',
- 'thumbnail': 'md5:67d9ff28009ba379bfa85ad1aaa0e2bd',
+ 'thumbnail': r're:https://.+/.+\.jpg',
'release_date': '20230905',
'timestamp': 1690103400,
'duration': 2939,
'release_timestamp': 1693898699,
- 'modified_timestamp': 1698057495,
- 'modified_date': '20231023',
'upload_date': '20230723',
+ 'modified_timestamp': 1707217907,
+ 'modified_date': '20240206',
+ 'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla',
+ 'series': 'Barakan Discovers',
+ },
+ }, {
+ # /ondemand/video/ url with alphabetical character in 5th position of id
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/',
+ 'info_dict': {
+ 'id': 'nw_c_en_9999-a07',
+ 'ext': 'mp4',
+ 'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
+ 'series': 'Mini-Dramas on SDGs',
+ 'modified_date': '20240206',
+ 'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
+ 'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6',
+ 'timestamp': 1621962360,
+ 'duration': 189,
+ 'release_date': '20230903',
+ 'modified_timestamp': 1707217907,
+ 'upload_date': '20210525',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'release_timestamp': 1693713487,
+ },
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/',
+ 'info_dict': {
+ 'id': 'nw_c_en_9999-d17',
+ 'ext': 'mp4',
+ 'title': 'Flowers of snow blossom - The 72 Pentads of Yamato',
+ 'description': 'Today’s focus: Snow',
+ 'release_timestamp': 1693792402,
+ 'release_date': '20230904',
+ 'upload_date': '20220128',
+ 'timestamp': 1643370960,
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'duration': 136,
+ 'series': '',
+ 'modified_date': '20240206',
+ 'modified_timestamp': 1707217907,
+ },
+ }, {
+ # new /shows/ url format
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/',
+ 'info_dict': {
+ 'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282',
+ 'ext': 'mp4',
+ 'title': 'Japanology Plus - 20th Anniversary Special Part 1',
+ 'description': 'md5:817d41fc8e54339ad2a916161ea24faf',
+ 'episode': '20th Anniversary Special Part 1',
+ 'series': 'Japanology Plus',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'duration': 1680,
+ 'timestamp': 1711020600,
+ 'upload_date': '20240321',
+ 'release_timestamp': 1711022683,
+ 'release_date': '20240321',
+ 'modified_timestamp': 1711031012,
+ 'modified_date': '20240321',
+ },
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/',
+ 'info_dict': {
+ 'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944',
+ 'ext': 'mp4',
+ 'title': '100 Ideas to Save the World - Working Styles Evolve',
+ 'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9',
+ 'episode': 'Working Styles Evolve',
+ 'series': '100 Ideas to Save the World',
+ 'thumbnail': r're:https://.+/.+\.jpg',
+ 'duration': 899,
+ 'upload_date': '20230325',
+ 'timestamp': 1679755200,
+ 'release_date': '20230905',
+ 'release_timestamp': 1693880540,
+ 'modified_date': '20240206',
+ 'modified_timestamp': 1707217907,
},
+ }, {
+ # new /shows/audio/ url format
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20231001-1/',
+ 'only_matching': True,
+ }, {
+ # valid url even if can't be found in wild; support needed for clip entries extraction
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/9999o80/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -273,18 +358,21 @@ class NhkVodIE(NhkBaseIE):
class NhkVodProgramIE(NhkBaseIE):
- _VALID_URL = rf'{NhkBaseIE._BASE_URL_REGEX}/program{NhkBaseIE._TYPE_REGEX}(?P<id>\w+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?'
+ _VALID_URL = rf'''(?x)
+ {NhkBaseIE._BASE_URL_REGEX}(?:shows|tv)/
+ (?:(?P<type>audio)/programs/)?(?P<id>\w+)/?
+ (?:\?(?:[^#]+&)?type=(?P<episode_type>clip|(?:radio|tv)Episode))?'''
_TESTS = [{
# video program episodes
- 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo',
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/sumo/',
'info_dict': {
'id': 'sumo',
'title': 'GRAND SUMO Highlights',
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
},
- 'playlist_mincount': 0,
+ 'playlist_mincount': 1,
}, {
- 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/',
'info_dict': {
'id': 'japanrailway',
'title': 'Japan Railway Journal',
@@ -293,40 +381,68 @@ class NhkVodProgramIE(NhkBaseIE):
'playlist_mincount': 12,
}, {
# video program clips
- 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/?type=clip',
'info_dict': {
'id': 'japanrailway',
'title': 'Japan Railway Journal',
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
},
- 'playlist_mincount': 5,
- }, {
- 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
- 'only_matching': True,
+ 'playlist_mincount': 12,
}, {
# audio program
- 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/',
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/programs/livinginjapan/',
+ 'info_dict': {
+ 'id': 'livinginjapan',
+ 'title': 'Living in Japan',
+ 'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54',
+ },
+ 'playlist_mincount': 12,
+ }, {
+ # /tv/ program url
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/',
+ 'info_dict': {
+ 'id': 'designtalksplus',
+ 'title': 'DESIGN TALKS plus',
+ 'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/',
'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return False if NhkVodIE.suitable(url) else super().suitable(url)
+
+ def _extract_meta_from_class_elements(self, class_values, html):
+ for class_value in class_values:
+ if value := clean_html(get_element_by_class(class_value, html)):
+ return value
+
def _real_extract(self, url):
lang, m_type, program_id, episode_type = self._match_valid_url(url).group('lang', 'type', 'id', 'episode_type')
episodes = self._call_api(
- program_id, lang, m_type == 'video', False, episode_type == 'clip')
+ program_id, lang, m_type != 'audio', False, episode_type == 'clip')
- entries = []
- for episode in episodes:
- episode_path = episode.get('url')
- if not episode_path:
- continue
- entries.append(self._extract_episode_info(
- urljoin(url, episode_path), episode))
+ def entries():
+ for episode in episodes:
+ if episode_path := episode.get('url'):
+ yield self._extract_episode_info(urljoin(url, episode_path), episode)
html = self._download_webpage(url, program_id)
- program_title = clean_html(get_element_by_class('p-programDetail__title', html))
- program_description = clean_html(get_element_by_class('p-programDetail__text', html))
-
- return self.playlist_result(entries, program_id, program_title, program_description)
+ program_title = self._extract_meta_from_class_elements([
+ 'p-programDetail__title', # /ondemand/program/
+ 'pProgramHero__logoText', # /shows/
+ 'tAudioProgramMain__title', # /shows/audio/programs/
+ 'p-program-name'], html) # /tv/
+ program_description = self._extract_meta_from_class_elements([
+ 'p-programDetail__text', # /ondemand/program/
+ 'pProgramHero__description', # /shows/
+ 'tAudioProgramMain__info', # /shows/audio/programs/
+ 'p-program-description'], html) # /tv/
+
+ return self.playlist_result(entries(), program_id, program_title, program_description)
class NhkForSchoolBangumiIE(InfoExtractor):
diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py
index 2521c40..64cddb4 100644
--- a/yt_dlp/extractor/nhl.py
+++ b/yt_dlp/extractor/nhl.py
@@ -3,8 +3,8 @@ from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
- parse_iso8601,
parse_duration,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 6a46246..b04ce96 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -1,11 +1,10 @@
-import datetime
+import datetime as dt
import functools
import itertools
import json
import re
import time
-
-from urllib.parse import urlparse
+import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from ..networking import Request
@@ -820,12 +819,12 @@ class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
'playlist_mincount': 1610,
}]
- _START_DATE = datetime.date(2007, 1, 1)
+ _START_DATE = dt.date(2007, 1, 1)
_RESULTS_PER_PAGE = 32
_MAX_PAGES = 50
def _entries(self, url, item_id, start_date=None, end_date=None):
- start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
+ start_date, end_date = start_date or self._START_DATE, end_date or dt.datetime.now().date()
# If the last page has a full page of videos, we need to break down the query interval further
last_page_len = len(list(self._get_entries_for_date(
@@ -957,7 +956,7 @@ class NiconicoLiveIE(InfoExtractor):
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
})
- hostname = remove_start(urlparse(urlh.url).hostname, 'sp.')
+ hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
if latency not in self._KNOWN_LATENCY:
latency = 'high'
diff --git a/yt_dlp/extractor/ninenews.py b/yt_dlp/extractor/ninenews.py
index 900d9ba..0b4f47b 100644
--- a/yt_dlp/extractor/ninenews.py
+++ b/yt_dlp/extractor/ninenews.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils.traversal import traverse_obj
diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py
index c655b75..b7170b0 100644
--- a/yt_dlp/extractor/ninenow.py
+++ b/yt_dlp/extractor/ninenow.py
@@ -2,8 +2,8 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
+ int_or_none,
smuggle_url,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py
index 35d1311..249e7cd 100644
--- a/yt_dlp/extractor/nitter.py
+++ b/yt_dlp/extractor/nitter.py
@@ -1,13 +1,14 @@
+import random
+import re
+
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
+ determine_ext,
parse_count,
- unified_timestamp,
remove_end,
- determine_ext,
+ unified_timestamp,
)
-import re
-import random
class NitterIE(InfoExtractor):
diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py
index cddc72f..513529b 100644
--- a/yt_dlp/extractor/nobelprize.py
+++ b/yt_dlp/extractor/nobelprize.py
@@ -1,11 +1,11 @@
from .common import InfoExtractor
from ..utils import (
- js_to_json,
- mimetype2ext,
determine_ext,
- update_url_query,
get_element_by_attribute,
int_or_none,
+ js_to_json,
+ mimetype2ext,
+ update_url_query,
)
diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py
index c7b8038..19cb972 100644
--- a/yt_dlp/extractor/noz.py
+++ b/yt_dlp/extractor/noz.py
@@ -1,11 +1,11 @@
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
- int_or_none,
find_xpath_attr,
- xpath_text,
+ int_or_none,
update_url_query,
+ xpath_text,
)
-from ..compat import compat_urllib_parse_unquote
class NozIE(InfoExtractor):
diff --git a/yt_dlp/extractor/nts.py b/yt_dlp/extractor/nts.py
new file mode 100644
index 0000000..a801740
--- /dev/null
+++ b/yt_dlp/extractor/nts.py
@@ -0,0 +1,76 @@
+from .common import InfoExtractor
+from ..utils import parse_iso8601, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class NTSLiveIE(InfoExtractor):
+ IE_NAME = 'nts.live'
+ _VALID_URL = r'https?://(?:www\.)?nts\.live/shows/[^/?#]+/episodes/(?P<id>[^/?#]+)'
+ _TESTS = [
+ {
+ # embedded soundcloud
+ 'url': 'https://www.nts.live/shows/yu-su/episodes/yu-su-2nd-april-2024',
+ 'md5': 'b5444c04888c869d68758982de1a27d8',
+ 'info_dict': {
+ 'id': '1791563518',
+ 'ext': 'opus',
+ 'uploader_id': '995579326',
+ 'title': 'Pender Street Steppers & YU SU',
+ 'timestamp': 1712073600,
+ 'upload_date': '20240402',
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-qKcNO0z0AQGGbv9s-GljJCw-original.jpg',
+ 'license': 'all-rights-reserved',
+ 'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/user-643553014',
+ 'uploader': 'NTS Latest',
+ 'description': 'md5:cd00ac535a63caaad722483ae3ff802a',
+ 'duration': 10784.157,
+ 'genres': ['Deep House', 'House', 'Leftfield Disco', 'Jazz Fusion', 'Dream Pop'],
+ 'modified_timestamp': 1712564687,
+ 'modified_date': '20240408',
+ },
+ },
+ {
+ # embedded mixcloud
+ 'url': 'https://www.nts.live/shows/absolute-fiction/episodes/absolute-fiction-23rd-july-2022',
+ 'info_dict': {
+ 'id': 'NTSRadio_absolute-fiction-23rd-july-2022',
+ 'ext': 'webm',
+ 'like_count': int,
+ 'title': 'Absolute Fiction',
+ 'comment_count': int,
+ 'uploader_url': 'https://www.mixcloud.com/NTSRadio/',
+ 'description': 'md5:ba49da971ae8d71ee45813c52c5e2a04',
+ 'tags': [],
+ 'duration': 3529,
+ 'timestamp': 1658588400,
+ 'repost_count': int,
+ 'upload_date': '20220723',
+ 'uploader_id': 'NTSRadio',
+ 'thumbnail': 'https://thumbnailer.mixcloud.com/unsafe/1024x1024/extaudio/5/1/a/d/ae3e-1be9-4fd4-983e-9c3294226eac',
+ 'uploader': 'Mixcloud NTS Radio',
+ 'genres': ['Minimal Synth', 'Post Punk', 'Industrial '],
+ 'modified_timestamp': 1658842165,
+ 'modified_date': '20220726',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._search_json(r'window\._REACT_STATE_\s*=', webpage, 'react state', video_id)
+
+ return {
+ '_type': 'url_transparent',
+ **traverse_obj(data, ('episode', {
+ 'url': ('audio_sources', ..., 'url', {url_or_none}, any),
+ 'title': ('name', {str}),
+ 'description': ('description', {str}),
+ 'genres': ('genres', ..., 'value', {str}),
+ 'timestamp': ('broadcast', {parse_iso8601}),
+ 'modified_timestamp': ('updated', {parse_iso8601}),
+ })),
+ }
diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py
index ec54041..5670445 100644
--- a/yt_dlp/extractor/nuevo.py
+++ b/yt_dlp/extractor/nuevo.py
@@ -1,9 +1,5 @@
from .common import InfoExtractor
-
-from ..utils import (
- float_or_none,
- xpath_text
-)
+from ..utils import float_or_none, xpath_text
class NuevoBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py
index 6ac351c..0ef0ec7 100644
--- a/yt_dlp/extractor/nuvid.py
+++ b/yt_dlp/extractor/nuvid.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- parse_duration,
int_or_none,
+ parse_duration,
strip_or_none,
traverse_obj,
url_or_none,
diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py
index 062f9a8..0a12aea 100644
--- a/yt_dlp/extractor/nzherald.py
+++ b/yt_dlp/extractor/nzherald.py
@@ -3,10 +3,7 @@ import json
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- traverse_obj
-)
+from ..utils import ExtractorError, traverse_obj
class NZHeraldIE(InfoExtractor):
diff --git a/yt_dlp/extractor/odkmedia.py b/yt_dlp/extractor/odkmedia.py
index b852160..8321b07 100644
--- a/yt_dlp/extractor/odkmedia.py
+++ b/yt_dlp/extractor/odkmedia.py
@@ -7,7 +7,7 @@ from ..utils import (
GeoRestrictedError,
float_or_none,
traverse_obj,
- try_call
+ try_call,
)
diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py
index 61d1f40..5507d2f 100644
--- a/yt_dlp/extractor/olympics.py
+++ b/yt_dlp/extractor/olympics.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- try_get
-)
+from ..utils import int_or_none, try_get
class OlympicsReplayIE(InfoExtractor):
diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py
index a46211e..351b397 100644
--- a/yt_dlp/extractor/onenewsnz.py
+++ b/yt_dlp/extractor/onenewsnz.py
@@ -1,10 +1,6 @@
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
-
-from ..utils import (
- ExtractorError,
- traverse_obj
-)
+from ..utils import ExtractorError, traverse_obj
class OneNewsNZIE(InfoExtractor):
diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py
index 0d59e8c..da10f37 100644
--- a/yt_dlp/extractor/onet.py
+++ b/yt_dlp/extractor/onet.py
@@ -2,13 +2,13 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
+ NO_DEFAULT,
ExtractorError,
+ determine_ext,
float_or_none,
get_element_by_class,
int_or_none,
js_to_json,
- NO_DEFAULT,
parse_iso8601,
remove_start,
strip_or_none,
diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py
index 1fafd9a..12bf557 100644
--- a/yt_dlp/extractor/opencast.py
+++ b/yt_dlp/extractor/opencast.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_iso8601,
traverse_obj,
diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py
index 82a81c6..c9a96ae 100644
--- a/yt_dlp/extractor/openrec.py
+++ b/yt_dlp/extractor/openrec.py
@@ -1,4 +1,5 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
get_first,
@@ -8,7 +9,6 @@ from ..utils import (
unified_strdate,
unified_timestamp,
)
-from ..compat import compat_str
class OpenRecBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py
index d49909d..0e7a848 100644
--- a/yt_dlp/extractor/ora.py
+++ b/yt_dlp/extractor/ora.py
@@ -1,4 +1,5 @@
import re
+
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py
index 526e9ac..3c837be 100644
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@@ -3,203 +3,24 @@ import functools
import re
from .common import InfoExtractor
-from ..networking import HEADRequest
from ..utils import (
- InAdvancePagedList,
clean_html,
determine_ext,
float_or_none,
int_or_none,
- join_nonempty,
make_archive_id,
mimetype2ext,
orderedSet,
+ parse_age_limit,
remove_end,
- smuggle_url,
strip_jsonp,
try_call,
- unescapeHTML,
unified_strdate,
- unsmuggle_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
-class ORFTVthekIE(InfoExtractor):
- IE_NAME = 'orf:tvthek'
- IE_DESC = 'ORF TVthek'
- _VALID_URL = r'(?P<url>https?://tvthek\.orf\.at/(?:(?:[^/]+/){2}){1,2}(?P<id>\d+))(/[^/]+/(?P<vid>\d+))?(?:$|[?#])'
-
- _TESTS = [{
- 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079',
- 'info_dict': {
- 'id': '14121079',
- },
- 'playlist_count': 11,
- 'params': {'noplaylist': True}
- }, {
- 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
- 'info_dict': {
- 'id': '14121079',
- },
- 'playlist_count': 1,
- 'params': {'playlist_items': '5'}
- }, {
- 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
- 'info_dict': {
- 'id': '14121079',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '15083150',
- 'ext': 'mp4',
- 'description': 'md5:7be1c485425f5f255a5e4e4815e77d04',
- 'thumbnail': 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg',
- 'title': 'Umfrage: Welches Tier ist Sebastian Kurz?',
- }
- }],
- 'playlist_count': 1,
- 'params': {'noplaylist': True, 'skip_download': 'm3u8'}
- }, {
- 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
- 'playlist': [{
- 'md5': '2942210346ed779588f428a92db88712',
- 'info_dict': {
- 'id': '8896777',
- 'ext': 'mp4',
- 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
- 'description': 'md5:c1272f0245537812d4e36419c207b67d',
- 'duration': 2668,
- 'upload_date': '20141208',
- },
- }],
- 'skip': 'Blocked outside of Austria / Germany',
- }, {
- 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
- 'info_dict': {
- 'id': '7982259',
- 'ext': 'mp4',
- 'title': 'Best of Ingrid Thurnher',
- 'upload_date': '20140527',
- 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
- },
- 'params': {
- 'skip_download': True, # rtsp downloads
- },
- 'skip': 'Blocked outside of Austria / Germany',
- }, {
- 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
- 'only_matching': True,
- }, {
- 'url': 'http://tvthek.orf.at/profile/Universum/35429',
- 'only_matching': True,
- }]
-
- def _pagefunc(self, url, data_jsb, n, *, image=None):
- sd = data_jsb[n]
- video_id, title = str(sd['id']), sd['title']
- formats = []
- for fd in sd['sources']:
- src = url_or_none(fd.get('src'))
- if not src:
- continue
- format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd)
- ext = determine_ext(src)
- if ext == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- src, video_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {format_id} m3u8 manifest')
- if any('/geoprotection' in f['url'] for f in m3u8_formats):
- self.raise_geo_restricted()
- formats.extend(m3u8_formats)
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- src, video_id, f4m_id=format_id, fatal=False))
- elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- src, video_id, mpd_id=format_id, fatal=False, note=f'Downloading {format_id} mpd manifest'))
- else:
- formats.append({
- 'format_id': format_id,
- 'url': src,
- 'protocol': fd.get('protocol'),
- })
-
- # Check for geoblocking.
- # There is a property is_geoprotection, but that's always false
- geo_str = sd.get('geoprotection_string')
- http_url = next(
- (f['url'] for f in formats if re.match(r'^https?://.*\.mp4$', f['url'])),
- None) if geo_str else None
- if http_url:
- self._request_webpage(
- HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking',
- errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats')
-
- subtitles = {}
- for sub in sd.get('subtitles', []):
- sub_src = sub.get('src')
- if not sub_src:
- continue
- subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
- 'url': sub_src,
- })
-
- upload_date = unified_strdate(sd.get('created_date'))
-
- thumbnails = []
- preview = sd.get('preview_image_url')
- if preview:
- thumbnails.append({
- 'id': 'preview',
- 'url': preview,
- 'preference': 0,
- })
- image = sd.get('image_full_url') or image
- if image:
- thumbnails.append({
- 'id': 'full',
- 'url': image,
- 'preference': 1,
- })
-
- yield {
- 'id': video_id,
- 'title': title,
- 'webpage_url': smuggle_url(f'{url}/part/{video_id}', {'force_noplaylist': True}),
- 'formats': formats,
- 'subtitles': subtitles,
- 'description': sd.get('description'),
- 'duration': int_or_none(sd.get('duration_in_seconds')),
- 'upload_date': upload_date,
- 'thumbnails': thumbnails,
- }
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url)
- playlist_id, video_id, base_url = self._match_valid_url(url).group('id', 'vid', 'url')
- webpage = self._download_webpage(url, playlist_id)
-
- data_jsb = self._parse_json(
- self._search_regex(
- r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
- webpage, 'playlist', group='json'),
- playlist_id, transform_source=unescapeHTML)['playlist']['videos']
-
- if not self._yes_playlist(playlist_id, video_id, smuggled_data):
- data_jsb = [sd for sd in data_jsb if str(sd.get('id')) == video_id]
-
- playlist_count = len(data_jsb)
- image = self._og_search_thumbnail(webpage) if playlist_count == 1 else None
-
- page_func = functools.partial(self._pagefunc, base_url, data_jsb, image=image)
- return {
- '_type': 'playlist',
- 'entries': InAdvancePagedList(page_func, playlist_count, 1),
- 'id': playlist_id,
- }
-
-
class ORFRadioIE(InfoExtractor):
IE_NAME = 'orf:radio'
@@ -569,7 +390,7 @@ class ORFFM4StoryIE(InfoExtractor):
class ORFONIE(InfoExtractor):
IE_NAME = 'orf:on'
- _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
+ _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
'info_dict': {
@@ -582,33 +403,59 @@ class ORFONIE(InfoExtractor):
'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
+ '_old_archive_ids': ['orftvthek 14210000'],
+ }
+ }, {
+ 'url': 'https://on.orf.at/video/3220355',
+ 'md5': 'f94d98e667cf9a3851317efb4e136662',
+ 'info_dict': {
+ 'id': '3220355',
+ 'ext': 'mp4',
+ 'duration': 445.04,
+ 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png',
+ 'title': '50 Jahre Burgenland: Der Festumzug',
+ 'description': 'md5:1560bf855119544ee8c4fa5376a2a6b0',
+ 'media_type': 'episode',
+ 'timestamp': 52916400,
+ 'upload_date': '19710905',
+ '_old_archive_ids': ['orftvthek 3220355'],
}
}]
- def _extract_video(self, video_id, display_id):
+ def _extract_video(self, video_id):
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json(
- f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
+ f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id)
+
+ if traverse_obj(api_json, 'is_drm_protected'):
+ self.report_drm(video_id)
formats, subtitles = [], {}
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
if manifest_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
- manifest_url, display_id, fatal=False, m3u8_id='hls')
+ manifest_url, video_id, fatal=False, m3u8_id='hls')
elif manifest_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles(
- manifest_url, display_id, fatal=False, mpd_id='dash')
+ manifest_url, video_id, fatal=False, mpd_id='dash')
else:
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
+ for sub_url in traverse_obj(api_json, (
+ '_embedded', 'subtitle',
+ ('xml_url', 'sami_url', 'stl_url', 'ttml_url', 'srt_url', 'vtt_url'), {url_or_none})):
+ self._merge_subtitles({'de': [{'url': sub_url}]}, target=subtitles)
+
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
+ '_old_archive_ids': [make_archive_id('ORFTVthek', video_id)],
**traverse_obj(api_json, {
+ 'age_limit': ('age_classification', {parse_age_limit}),
'duration': ('duration_second', {float_or_none}),
'title': (('title', 'headline'), {str}),
'description': (('description', 'teaser_text'), {str}),
@@ -617,14 +464,14 @@ class ORFONIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id, display_id = self._match_valid_url(url).group('id', 'slug')
- webpage = self._download_webpage(url, display_id)
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage, default=None),
- **self._search_json_ld(webpage, display_id, fatal=False),
- **self._extract_video(video_id, display_id),
+ **self._search_json_ld(webpage, video_id, fatal=False),
+ **self._extract_video(video_id),
}
diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py
index 5620330..3e969c8 100644
--- a/yt_dlp/extractor/packtpub.py
+++ b/yt_dlp/extractor/packtpub.py
@@ -3,13 +3,12 @@ import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
# remove_end,
str_or_none,
strip_or_none,
unified_timestamp,
- # urljoin,
)
diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py
index ddea32d..6b25962 100644
--- a/yt_dlp/extractor/panopto.py
+++ b/yt_dlp/extractor/panopto.py
@@ -1,21 +1,17 @@
import calendar
-import json
+import datetime as dt
import functools
-from datetime import datetime, timezone
-from random import random
+import json
+import random
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlparse,
- compat_urlparse
-)
-
+from ..compat import compat_urllib_parse_urlparse, compat_urlparse
from ..utils import (
- bug_reports_message,
ExtractorError,
+ OnDemandPagedList,
+ bug_reports_message,
get_first,
int_or_none,
- OnDemandPagedList,
parse_qs,
srt_subtitles_timecode,
traverse_obj,
@@ -243,7 +239,7 @@ class PanoptoIE(PanoptoBaseIE):
invocation_id = delivery_info.get('InvocationId')
stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
if invocation_id and stream_id and duration:
- timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/'
+ timestamp_str = f'/Date({calendar.timegm(dt.datetime.now(dt.timezone.utc).timetuple())}000)/'
data = {
'streamRequests': [
{
@@ -415,7 +411,7 @@ class PanoptoIE(PanoptoBaseIE):
'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), expected_type=lambda x: x or None),
'timestamp': session_start_time - 11640000000 if session_start_time else None,
'duration': delivery.get('Duration'),
- 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}',
+ 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random.random()}',
'average_rating': delivery.get('AverageRating'),
'chapters': self._extract_chapters(timestamps),
'uploader': delivery.get('OwnerDisplayName') or None,
diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py
index 7e472a6..3f19803 100644
--- a/yt_dlp/extractor/paramountplus.py
+++ b/yt_dlp/extractor/paramountplus.py
@@ -1,7 +1,7 @@
import itertools
-from .common import InfoExtractor
from .cbs import CBSBaseIE
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index d2ddb72..6c441ff 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -1,8 +1,8 @@
import itertools
+import urllib.parse
from .common import InfoExtractor
from .vimeo import VimeoIE
-from ..compat import compat_urllib_parse_unquote
from ..networking.exceptions import HTTPError
from ..utils import (
KNOWN_EXTENSIONS,
@@ -14,7 +14,6 @@ from ..utils import (
parse_iso8601,
str_or_none,
traverse_obj,
- try_get,
url_or_none,
urljoin,
)
@@ -92,7 +91,7 @@ class PatreonIE(PatreonBaseIE):
'thumbnail': 're:^https?://.*$',
'upload_date': '20150211',
'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
- 'uploader_id': 'TraciJHines',
+ 'uploader_id': '@TraciHinesMusic',
'categories': ['Entertainment'],
'duration': 282,
'view_count': int,
@@ -106,8 +105,10 @@ class PatreonIE(PatreonBaseIE):
'availability': 'public',
'channel_follower_count': int,
'playable_in_embed': True,
- 'uploader_url': 'http://www.youtube.com/user/TraciJHines',
+ 'uploader_url': 'https://www.youtube.com/@TraciHinesMusic',
'comment_count': int,
+ 'channel_is_verified': True,
+ 'chapters': 'count:4',
},
'params': {
'noplaylist': True,
@@ -176,7 +177,71 @@ class PatreonIE(PatreonBaseIE):
'uploader_url': 'https://www.patreon.com/thenormies',
},
'skip': 'Patron-only content',
+ }, {
+ # dead vimeo and embed URLs, need to extract post_file
+ 'url': 'https://www.patreon.com/posts/hunter-x-hunter-34007913',
+ 'info_dict': {
+ 'id': '34007913',
+ 'ext': 'mp4',
+ 'title': 'Hunter x Hunter | Kurapika DESTROYS Uvogin!!!',
+ 'like_count': int,
+ 'uploader': 'YaBoyRoshi',
+ 'timestamp': 1581636833,
+ 'channel_url': 'https://www.patreon.com/yaboyroshi',
+ 'thumbnail': r're:^https?://.*$',
+ 'tags': ['Hunter x Hunter'],
+ 'uploader_id': '14264111',
+ 'comment_count': int,
+ 'channel_follower_count': int,
+ 'description': 'Kurapika is a walking cheat code!',
+ 'upload_date': '20200213',
+ 'channel_id': '2147162',
+ 'uploader_url': 'https://www.patreon.com/yaboyroshi',
+ },
+ }, {
+ # NSFW vimeo embed URL
+ 'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
+ 'info_dict': {
+ 'id': '902250943',
+ 'ext': 'mp4',
+ 'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
+ 'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
+ 'uploader': 'Npickyeonhwa',
+ 'uploader_id': '90574422',
+ 'uploader_url': 'https://www.patreon.com/Yeonhwa726',
+ 'channel_id': '10237902',
+ 'channel_url': 'https://www.patreon.com/Yeonhwa726',
+ 'duration': 70,
+ 'timestamp': 1705150153,
+ 'upload_date': '20240113',
+ 'comment_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.+',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # multiple attachments/embeds
+ 'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
+ 'playlist_count': 3,
+ 'info_dict': {
+ 'id': '100601977',
+ 'title': '"Holy Wars" (Megadeth) Solos Transcription & Lesson/Analysis',
+ 'description': 'md5:d099ab976edfce6de2a65c2b169a88d3',
+ 'uploader': 'Bradley Hall',
+ 'uploader_id': '24401883',
+ 'uploader_url': 'https://www.patreon.com/bradleyhallguitar',
+ 'channel_id': '3193932',
+ 'channel_url': 'https://www.patreon.com/bradleyhallguitar',
+ 'channel_follower_count': int,
+ 'timestamp': 1710777855,
+ 'upload_date': '20240318',
+ 'like_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:^https?://.+',
+ },
+ 'skip': 'Patron-only content',
}]
+ _RETURN_TYPE = 'video'
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -191,102 +256,108 @@ class PatreonIE(PatreonBaseIE):
'include': 'audio,user,user_defined_tags,campaign,attachments_media',
})
attributes = post['data']['attributes']
- title = attributes['title'].strip()
- image = attributes.get('image') or {}
- info = {
- 'id': video_id,
- 'title': title,
- 'description': clean_html(attributes.get('content')),
- 'thumbnail': image.get('large_url') or image.get('url'),
- 'timestamp': parse_iso8601(attributes.get('published_at')),
- 'like_count': int_or_none(attributes.get('like_count')),
- 'comment_count': int_or_none(attributes.get('comment_count')),
- }
- can_view_post = traverse_obj(attributes, 'current_user_can_view')
- if can_view_post and info['comment_count']:
- info['__post_extractor'] = self.extract_comments(video_id)
-
- for i in post.get('included', []):
- i_type = i.get('type')
- if i_type == 'media':
- media_attributes = i.get('attributes') or {}
- download_url = media_attributes.get('download_url')
+ info = traverse_obj(attributes, {
+ 'title': ('title', {str.strip}),
+ 'description': ('content', {clean_html}),
+ 'thumbnail': ('image', ('large_url', 'url'), {url_or_none}, any),
+ 'timestamp': ('published_at', {parse_iso8601}),
+ 'like_count': ('like_count', {int_or_none}),
+ 'comment_count': ('comment_count', {int_or_none}),
+ })
+
+ entries = []
+ idx = 0
+ for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
+ include_type = include['type']
+ if include_type == 'media':
+ media_attributes = traverse_obj(include, ('attributes', {dict})) or {}
+ download_url = url_or_none(media_attributes.get('download_url'))
ext = mimetype2ext(media_attributes.get('mimetype'))
# if size_bytes is None, this media file is likely unavailable
# See: https://github.com/yt-dlp/yt-dlp/issues/4608
size_bytes = int_or_none(media_attributes.get('size_bytes'))
if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
- # XXX: what happens if there are multiple attachments?
- return {
- **info,
+ idx += 1
+ entries.append({
+ 'id': f'{video_id}-{idx}',
'ext': ext,
'filesize': size_bytes,
'url': download_url,
- }
- elif i_type == 'user':
- user_attributes = i.get('attributes')
- if user_attributes:
- info.update({
- 'uploader': user_attributes.get('full_name'),
- 'uploader_id': str_or_none(i.get('id')),
- 'uploader_url': user_attributes.get('url'),
})
- elif i_type == 'post_tag':
- info.setdefault('tags', []).append(traverse_obj(i, ('attributes', 'value')))
-
- elif i_type == 'campaign':
- info.update({
- 'channel': traverse_obj(i, ('attributes', 'title')),
- 'channel_id': str_or_none(i.get('id')),
- 'channel_url': traverse_obj(i, ('attributes', 'url')),
- 'channel_follower_count': int_or_none(traverse_obj(i, ('attributes', 'patron_count'))),
- })
+ elif include_type == 'user':
+ info.update(traverse_obj(include, {
+ 'uploader': ('attributes', 'full_name', {str}),
+ 'uploader_id': ('id', {str_or_none}),
+ 'uploader_url': ('attributes', 'url', {url_or_none}),
+ }))
+
+ elif include_type == 'post_tag':
+ if post_tag := traverse_obj(include, ('attributes', 'value', {str})):
+ info.setdefault('tags', []).append(post_tag)
+
+ elif include_type == 'campaign':
+ info.update(traverse_obj(include, {
+ 'channel': ('attributes', 'title', {str}),
+ 'channel_id': ('id', {str_or_none}),
+ 'channel_url': ('attributes', 'url', {url_or_none}),
+ 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
+ }))
# handle Vimeo embeds
- if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
- embed_html = try_get(attributes, lambda x: x['embed']['html'])
- v_url = url_or_none(compat_urllib_parse_unquote(
- self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
- if v_url:
- return {
- **info,
- '_type': 'url_transparent',
- 'url': VimeoIE._smuggle_referrer(v_url, 'https://patreon.com'),
- 'ie_key': 'Vimeo',
- }
-
- embed_url = try_get(attributes, lambda x: x['embed']['url'])
- if embed_url:
- return {
- **info,
- '_type': 'url',
- 'url': embed_url,
- }
-
- post_file = traverse_obj(attributes, 'post_file')
+ if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
+ v_url = urllib.parse.unquote(self._html_search_regex(
+ r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
+ traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
+ if url_or_none(v_url) and self._request_webpage(
+ v_url, video_id, 'Checking Vimeo embed URL',
+ headers={'Referer': 'https://patreon.com/'},
+ fatal=False, errnote=False):
+ entries.append(self.url_result(
+ VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
+ VimeoIE, url_transparent=True))
+
+ embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
+ if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
+ entries.append(self.url_result(embed_url))
+
+ post_file = traverse_obj(attributes, ('post_file', {dict}))
if post_file:
name = post_file.get('name')
ext = determine_ext(name)
if ext in KNOWN_EXTENSIONS:
- return {
- **info,
+ entries.append({
+ 'id': video_id,
'ext': ext,
'url': post_file['url'],
- }
+ })
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
- return {
- **info,
+ entries.append({
+ 'id': video_id,
'formats': formats,
'subtitles': subtitles,
- }
+ })
- if can_view_post is False:
+ can_view_post = traverse_obj(attributes, 'current_user_can_view')
+ comments = None
+ if can_view_post and info.get('comment_count'):
+ comments = self.extract_comments(video_id)
+
+ if not entries and can_view_post is False:
self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
- else:
+ elif not entries:
self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
+ elif len(entries) == 1:
+ info.update(entries[0])
+ else:
+ for entry in entries:
+ entry.update(info)
+ return self.playlist_result(entries, video_id, **info, __post_extractor=comments)
+
+ info['id'] = video_id
+ info['__post_extractor'] = comments
return info
def _get_comments(self, post_id):
diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py
index 2bb2ea9..f6f5a5c 100644
--- a/yt_dlp/extractor/pbs.py
+++ b/yt_dlp/extractor/pbs.py
@@ -3,10 +3,11 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ US_RATINGS,
ExtractorError,
determine_ext,
- int_or_none,
float_or_none,
+ int_or_none,
js_to_json,
orderedSet,
strip_jsonp,
@@ -14,7 +15,6 @@ from ..utils import (
traverse_obj,
unified_strdate,
url_or_none,
- US_RATINGS,
)
diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py
index e27e5a7..086eaaf 100644
--- a/yt_dlp/extractor/pearvideo.py
+++ b/yt_dlp/extractor/pearvideo.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..utils import (
qualities,
- unified_timestamp,
traverse_obj,
+ unified_timestamp,
)
diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py
index 730b239..fb4d025 100644
--- a/yt_dlp/extractor/peertube.py
+++ b/yt_dlp/extractor/peertube.py
@@ -4,6 +4,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ OnDemandPagedList,
format_field,
int_or_none,
parse_resolution,
@@ -12,7 +13,6 @@ from ..utils import (
unified_timestamp,
url_or_none,
urljoin,
- OnDemandPagedList,
)
@@ -1470,11 +1470,15 @@ class PeerTubeIE(InfoExtractor):
title = video['name']
- formats = []
+ formats, is_live = [], False
files = video.get('files') or []
for playlist in (video.get('streamingPlaylists') or []):
if not isinstance(playlist, dict):
continue
+ if playlist_url := url_or_none(playlist.get('playlistUrl')):
+ is_live = True
+ formats.extend(self._extract_m3u8_formats(
+ playlist_url, video_id, fatal=False, live=True))
playlist_files = playlist.get('files')
if not (playlist_files and isinstance(playlist_files, list)):
continue
@@ -1498,6 +1502,7 @@ class PeerTubeIE(InfoExtractor):
f['vcodec'] = 'none'
else:
f['fps'] = int_or_none(file_.get('fps'))
+ is_live = False
formats.append(f)
description = video.get('description')
@@ -1555,6 +1560,7 @@ class PeerTubeIE(InfoExtractor):
'categories': categories,
'formats': formats,
'subtitles': subtitles,
+ 'is_live': is_live,
'webpage_url': webpage_url,
}
diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py
index 3ae985d..87d912d 100644
--- a/yt_dlp/extractor/piapro.py
+++ b/yt_dlp/extractor/piapro.py
@@ -2,6 +2,8 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
+ clean_html,
+ get_element_by_class,
parse_duration,
parse_filesize,
str_to_int,
@@ -88,34 +90,22 @@ class PiaproIE(InfoExtractor):
if category_id not in ('1', '2', '21', '22', '23', '24', '25'):
raise ExtractorError('The URL does not contain audio.', expected=True)
- str_duration, str_filesize = self._search_regex(
- r'サイズ:</span>(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size',
- group=(1, 2), default=(None, None))
- str_viewcount = self._search_regex(r'閲覧数:</span>([0-9,]+)\s+', webpage, 'view count', fatal=False)
-
- uploader_id, uploader = self._search_regex(
- r'<a\s+class="cd_user-name"\s+href="/(.*)">([^<]+)さん<', webpage, 'uploader',
- group=(1, 2), default=(None, None))
- content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID')
- create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp')
-
- player_webpage = self._download_webpage(
- f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}',
- video_id, note='Downloading player webpage')
+ def extract_info(name, description):
+ return self._search_regex(rf'{name}[::]\s*([\d\s,:/]+)\s*</p>', webpage, description, default=None)
return {
'id': video_id,
- 'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False),
- 'description': self._html_search_regex(r'(?s)<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False),
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'timestamp': unified_timestamp(create_date, False),
- 'duration': parse_duration(str_duration),
- 'view_count': str_to_int(str_viewcount),
+ 'title': clean_html(get_element_by_class('contents_title', webpage)),
+ 'description': clean_html(get_element_by_class('contents_description', webpage)),
+ 'uploader': clean_html(get_element_by_class('contents_creator_txt', webpage)),
+ 'uploader_id': self._search_regex(
+ r'<a\s+href="/([^"]+)"', get_element_by_class('contents_creator', webpage), 'uploader id', default=None),
+ 'timestamp': unified_timestamp(extract_info('投稿日', 'timestamp'), False),
+ 'duration': parse_duration(extract_info('長さ', 'duration')),
+ 'view_count': str_to_int(extract_info('閲覧数', 'view count')),
'thumbnail': self._html_search_meta('twitter:image', webpage),
-
- 'filesize_approx': parse_filesize(str_filesize.replace(',', '')),
- 'url': self._search_regex(r'mp3:\s*\'(.*?)\'\}', player_webpage, 'url'),
+ 'filesize_approx': parse_filesize((extract_info('サイズ', 'size') or '').replace(',', '')),
+ 'url': self._search_regex(r'\"url\":\s*\"(.*?)\"', webpage, 'url'),
'ext': 'mp3',
'vcodec': 'none',
}
diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py
index 97a9bf5..02ae2fe 100644
--- a/yt_dlp/extractor/piksel.py
+++ b/yt_dlp/extractor/piksel.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
int_or_none,
join_nonempty,
parse_iso8601,
@@ -25,29 +25,31 @@ class PikselIE(InfoExtractor):
)|
(?:api|player)\.multicastmedia|
(?:api-ovp|player)\.piksel
- )\.com|
+ )\.(?:com|tech)|
(?:
mz-edge\.stream\.co|
movie-s\.nhk\.or
)\.jp|
vidego\.baltimorecity\.gov
)/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
- _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)']
+ _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.(?:com|tech)/v/[a-z0-9]+)']
_TESTS = [
{
- 'url': 'http://player.piksel.com/v/ums2867l',
+ 'url': 'http://player.piksel.tech/v/ums2867l',
'md5': '34e34c8d89dc2559976a6079db531e85',
'info_dict': {
'id': 'ums2867l',
'ext': 'mp4',
'title': 'GX-005 with Caption',
'timestamp': 1481335659,
- 'upload_date': '20161210'
+ 'upload_date': '20161210',
+ 'description': '',
+ 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1488331553/3238987.jpg?w=640&h=480',
}
},
{
# Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
- 'url': 'https://player.piksel.com/v/v80kqp41',
+ 'url': 'https://player.piksel.tech/v/v80kqp41',
'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d',
'info_dict': {
'id': 'v80kqp41',
@@ -55,7 +57,8 @@ class PikselIE(InfoExtractor):
'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
'timestamp': 1486171129,
- 'upload_date': '20170204'
+ 'upload_date': '20170204',
+ 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1495569155/3279887.jpg?w=640&h=360',
}
},
{
@@ -65,7 +68,7 @@ class PikselIE(InfoExtractor):
}
]
- def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.com', fatal=True):
+ def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.tech', fatal=True):
url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5')
response = traverse_obj(
self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {}
@@ -146,7 +149,7 @@ class PikselIE(InfoExtractor):
smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
if smil_url:
- transform_source = None
+ transform_source = lambda x: x.replace('src="/', 'src="')
if ref_id == 'nhkworld':
# TODO: figure out if this is something to be fixed in urljoin,
# _parse_smil_formats or keep it here
diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py
index d67f600..c72a387 100644
--- a/yt_dlp/extractor/pladform.py
+++ b/yt_dlp/extractor/pladform.py
@@ -1,11 +1,11 @@
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_qs,
- xpath_text,
qualities,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py
index 166b98c..d978c08 100644
--- a/yt_dlp/extractor/platzi.py
+++ b/yt_dlp/extractor/platzi.py
@@ -4,8 +4,8 @@ from ..compat import (
compat_str,
)
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py
index c418f88..a01b422 100644
--- a/yt_dlp/extractor/playtvak.py
+++ b/yt_dlp/extractor/playtvak.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_urllib_parse_urlencode,
+ compat_urlparse,
)
from ..utils import (
ExtractorError,
diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py
index 809b656..60c9eff 100644
--- a/yt_dlp/extractor/pluralsight.py
+++ b/yt_dlp/extractor/pluralsight.py
@@ -10,8 +10,8 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
float_or_none,
int_or_none,
parse_duration,
diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py
index 1524a1f..ecf2132 100644
--- a/yt_dlp/extractor/polsatgo.py
+++ b/yt_dlp/extractor/polsatgo.py
@@ -1,12 +1,12 @@
-from uuid import uuid4
import json
+import uuid
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
int_or_none,
try_get,
url_or_none,
- ExtractorError,
)
@@ -51,7 +51,7 @@ class PolsatGoIE(InfoExtractor):
}
def _call_api(self, endpoint, media_id, method, params):
- rand_uuid = str(uuid4())
+ rand_uuid = str(uuid.uuid4())
res = self._download_json(
f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id,
note=f'Downloading {method} JSON metadata',
diff --git a/yt_dlp/extractor/porn91.py b/yt_dlp/extractor/porn91.py
deleted file mode 100644
index 7d16a16..0000000
--- a/yt_dlp/extractor/porn91.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import urllib.parse
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- parse_duration,
- remove_end,
- unified_strdate,
- ExtractorError,
-)
-
-
-class Porn91IE(InfoExtractor):
- IE_NAME = '91porn'
- _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/view_video.php\?([^#]+&)?viewkey=(?P<id>\w+)'
-
- _TESTS = [{
- 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
- 'md5': 'd869db281402e0ef4ddef3c38b866f86',
- 'info_dict': {
- 'id': '7e42283b4f5ab36da134',
- 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
- 'description': 'md5:1ff241f579b07ae936a54e810ad2e891',
- 'ext': 'mp4',
- 'duration': 431,
- 'upload_date': '20150520',
- 'comment_count': int,
- 'view_count': int,
- 'age_limit': 18,
- }
- }, {
- 'url': 'https://91porn.com/view_video.php?viewkey=7ef0cf3d362c699ab91c',
- 'md5': 'f8fd50540468a6d795378cd778b40226',
- 'info_dict': {
- 'id': '7ef0cf3d362c699ab91c',
- 'title': '真实空乘,冲上云霄第二部',
- 'description': 'md5:618bf9652cafcc66cd277bd96789baea',
- 'ext': 'mp4',
- 'duration': 248,
- 'upload_date': '20221119',
- 'comment_count': int,
- 'view_count': int,
- 'age_limit': 18,
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- self._set_cookie('91porn.com', 'language', 'cn_CN')
-
- webpage = self._download_webpage(
- 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
-
- if '视频不存在,可能已经被删除或者被举报为不良内容!' in webpage:
- raise ExtractorError('91 Porn says: Video does not exist', expected=True)
-
- daily_limit = self._search_regex(
- r'作为游客,你每天只可观看([\d]+)个视频', webpage, 'exceeded daily limit', default=None, fatal=False)
- if daily_limit:
- raise ExtractorError(f'91 Porn says: Daily limit {daily_limit} videos exceeded', expected=True)
-
- video_link_url = self._search_regex(
- r'document\.write\(\s*strencode2\s*\(\s*((?:"[^"]+")|(?:\'[^\']+\'))', webpage, 'video link')
- video_link_url = self._search_regex(
- r'src=["\']([^"\']+)["\']', urllib.parse.unquote(video_link_url), 'unquoted video link')
-
- formats, subtitles = self._get_formats_and_subtitle(video_link_url, video_id)
-
- return {
- 'id': video_id,
- 'title': remove_end(self._html_extract_title(webpage).replace('\n', ''), 'Chinese homemade video').strip(),
- 'formats': formats,
- 'subtitles': subtitles,
- 'upload_date': unified_strdate(self._search_regex(
- r'<span\s+class=["\']title-yakov["\']>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload_date', fatal=False)),
- 'description': self._html_search_regex(
- r'<span\s+class=["\']more title["\']>\s*([^<]+)', webpage, 'description', fatal=False),
- 'duration': parse_duration(self._search_regex(
- r'时长:\s*<span[^>]*>\s*(\d+(?::\d+){1,2})', webpage, 'duration', fatal=False)),
- 'comment_count': int_or_none(self._search_regex(
- r'留言:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)),
- 'view_count': int_or_none(self._search_regex(
- r'热度:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'view count', fatal=False)),
- 'age_limit': 18,
- }
-
- def _get_formats_and_subtitle(self, video_link_url, video_id):
- ext = determine_ext(video_link_url)
- if ext == 'm3u8':
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_link_url, video_id, ext='mp4')
- else:
- formats = [{'url': video_link_url, 'ext': ext}]
- subtitles = {}
-
- return formats, subtitles
diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py
index 51a9cf3..d711d3e 100644
--- a/yt_dlp/extractor/pornflip.py
+++ b/yt_dlp/extractor/pornflip.py
@@ -1,9 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
- parse_iso8601
-)
+from ..utils import int_or_none, parse_duration, parse_iso8601
class PornFlipIE(InfoExtractor):
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index 29a3e43..d94f28c 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -97,7 +97,7 @@ class PornHubBaseIE(InfoExtractor):
login_form = self._hidden_inputs(login_page)
login_form.update({
- 'username': username,
+ 'email': username,
'password': password,
})
diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py
index 2e51b4f..b8e8701 100644
--- a/yt_dlp/extractor/pornovoisines.py
+++ b/yt_dlp/extractor/pornovoisines.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
float_or_none,
+ int_or_none,
unified_strdate,
)
diff --git a/yt_dlp/extractor/pr0gramm.py b/yt_dlp/extractor/pr0gramm.py
index 66f8a5f..3e0ccba 100644
--- a/yt_dlp/extractor/pr0gramm.py
+++ b/yt_dlp/extractor/pr0gramm.py
@@ -1,5 +1,6 @@
+import datetime as dt
import json
-from urllib.parse import unquote
+import urllib.parse
from .common import InfoExtractor
from ..compat import functools
@@ -114,7 +115,7 @@ class Pr0grammIE(InfoExtractor):
cookies = self._get_cookies(self.BASE_URL)
if 'me' not in cookies:
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
- if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
+ if traverse_obj(cookies, ('me', {lambda x: x.value}, {urllib.parse.unquote}, {json.loads}, 'verified')):
flags |= 0b00110
return flags
@@ -196,6 +197,7 @@ class Pr0grammIE(InfoExtractor):
'like_count': ('up', {int}),
'dislike_count': ('down', {int}),
'timestamp': ('created', {int}),
+ 'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
}),
}
diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py
index 46e2e8a..4c33bae 100644
--- a/yt_dlp/extractor/prosiebensat1.py
+++ b/yt_dlp/extractor/prosiebensat1.py
@@ -1,6 +1,6 @@
+import hashlib
import re
-from hashlib import sha1
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -42,7 +42,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
'Downloading protocols JSON',
headers=self.geo_verification_headers(), query={
'access_id': self._ACCESS_ID,
- 'client_token': sha1((raw_ct).encode()).hexdigest(),
+ 'client_token': hashlib.sha1((raw_ct).encode()).hexdigest(),
'video_id': clip_id,
}, fatal=False, expected_status=(403,)) or {}
error = protocols.get('error') or {}
@@ -53,7 +53,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
urls = (self._download_json(
self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
'access_id': self._ACCESS_ID,
- 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
+ 'client_token': hashlib.sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
'protocols': self._SUPPORTED_PROTOCOLS,
'server_token': server_token,
'video_id': clip_id,
@@ -77,7 +77,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
if not formats:
source_ids = [compat_str(source['id']) for source in video['sources']]
- client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+ client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
sources = self._download_json(
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
@@ -96,7 +96,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
for source_id in source_ids:
- client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+ client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
urls = self._download_json(
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
clip_id, 'Downloading urls JSON', fatal=False, query={
diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py
index 5bb1832..338794e 100644
--- a/yt_dlp/extractor/prx.py
+++ b/yt_dlp/extractor/prx.py
@@ -1,14 +1,15 @@
import itertools
+
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
- urljoin,
- traverse_obj,
+ clean_html,
int_or_none,
mimetype2ext,
- clean_html,
- url_or_none,
- unified_timestamp,
str_or_none,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py
index 4b8e5e9..fc4c29e 100644
--- a/yt_dlp/extractor/puhutv.py
+++ b/yt_dlp/extractor/puhutv.py
@@ -3,8 +3,8 @@ from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
+ int_or_none,
parse_resolution,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/qingting.py b/yt_dlp/extractor/qingting.py
index aa690d4..cb00de2 100644
--- a/yt_dlp/extractor/qingting.py
+++ b/yt_dlp/extractor/qingting.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import traverse_obj
diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py
index 9285825..90141e6 100644
--- a/yt_dlp/extractor/qqmusic.py
+++ b/yt_dlp/extractor/qqmusic.py
@@ -4,8 +4,8 @@ import time
from .common import InfoExtractor
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
strip_jsonp,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py
index 1a5a635..4a09dcd 100644
--- a/yt_dlp/extractor/radiocanada.py
+++ b/yt_dlp/extractor/radiocanada.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
unified_strdate,
)
diff --git a/yt_dlp/extractor/radiocomercial.py b/yt_dlp/extractor/radiocomercial.py
index 38f8cf7..0c21977 100644
--- a/yt_dlp/extractor/radiocomercial.py
+++ b/yt_dlp/extractor/radiocomercial.py
@@ -14,7 +14,7 @@ from ..utils import (
try_call,
unified_strdate,
update_url,
- urljoin
+ urljoin,
)
from ..utils.traversal import traverse_obj
diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py
index 8f9737a..5d7d3dd 100644
--- a/yt_dlp/extractor/radiokapital.py
+++ b/yt_dlp/extractor/radiokapital.py
@@ -1,18 +1,14 @@
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- traverse_obj,
- unescapeHTML,
-)
-
import itertools
-from urllib.parse import urlencode
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import clean_html, traverse_obj, unescapeHTML
class RadioKapitalBaseIE(InfoExtractor):
def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
return self._download_json(
- f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}',
+ f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urllib.parse.urlencode(qs)}',
video_id, note=note)
def _parse_episode(self, data):
diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py
index 6752017..632c8c2 100644
--- a/yt_dlp/extractor/radiozet.py
+++ b/yt_dlp/extractor/radiozet.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- traverse_obj,
strip_or_none,
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py
index 3c00183..325e278 100644
--- a/yt_dlp/extractor/radlive.py
+++ b/yt_dlp/extractor/radlive.py
@@ -1,13 +1,13 @@
import json
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
format_field,
traverse_obj,
try_get,
- unified_timestamp
+ unified_timestamp,
)
-from .common import InfoExtractor
class RadLiveIE(InfoExtractor):
diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py
index c1fc65c..c2e7a6f 100644
--- a/yt_dlp/extractor/rai.py
+++ b/yt_dlp/extractor/rai.py
@@ -3,11 +3,11 @@ import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
+ ExtractorError,
+ GeoRestrictedError,
clean_html,
determine_ext,
- ExtractorError,
filter_dict,
- GeoRestrictedError,
int_or_none,
join_nonempty,
parse_duration,
diff --git a/yt_dlp/extractor/rbgtum.py b/yt_dlp/extractor/rbgtum.py
index 54f194c..5f2d0c1 100644
--- a/yt_dlp/extractor/rbgtum.py
+++ b/yt_dlp/extractor/rbgtum.py
@@ -1,7 +1,7 @@
import re
from .common import InfoExtractor
-from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
+from ..utils import ExtractorError, parse_qs, remove_start, traverse_obj
class RbgTumIE(InfoExtractor):
diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py
index 6a7c7f3..9c382e2 100644
--- a/yt_dlp/extractor/rcti.py
+++ b/yt_dlp/extractor/rcti.py
@@ -5,11 +5,11 @@ import time
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
strip_or_none,
traverse_obj,
- try_get
+ try_get,
)
diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py
index 1a1c663..cc76b89 100644
--- a/yt_dlp/extractor/rds.py
+++ b/yt_dlp/extractor/rds.py
@@ -1,10 +1,10 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ js_to_json,
parse_duration,
parse_iso8601,
- js_to_json,
)
-from ..compat import compat_str
class RDSIE(InfoExtractor):
diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py
index d1de249..fac51b9 100644
--- a/yt_dlp/extractor/redbulltv.py
+++ b/yt_dlp/extractor/redbulltv.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- float_or_none,
ExtractorError,
+ float_or_none,
)
diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 62f669f..bc3e5f7 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -5,11 +5,13 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ parse_qs,
traverse_obj,
try_get,
unescapeHTML,
- urlencode_postdata,
+ update_url_query,
url_or_none,
+ urlencode_postdata,
)
@@ -76,7 +78,7 @@ class RedditIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'comment_count': int,
- 'age_limit': 0,
+ 'age_limit': 18,
'channel_id': 'u_creepyt0es',
},
'params': {
@@ -151,6 +153,51 @@ class RedditIE(InfoExtractor):
},
'skip': 'Requires account that has opted-in to the GenZedong subreddit',
}, {
+ # subtitles in HLS manifest
+ 'url': 'https://www.reddit.com/r/Unexpected/comments/1cl9h0u/the_insurance_claim_will_be_interesting/',
+ 'info_dict': {
+ 'id': 'a2mdj5d57qyc1',
+ 'ext': 'mp4',
+ 'display_id': '1cl9h0u',
+ 'title': 'The insurance claim will be interesting',
+ 'uploader': 'darrenpauli',
+ 'channel_id': 'Unexpected',
+ 'duration': 53,
+ 'upload_date': '20240506',
+ 'timestamp': 1714966382,
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ 'subtitles': {'en': 'mincount:1'},
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # subtitles from caption-url
+ 'url': 'https://www.reddit.com/r/soccer/comments/1cxwzso/tottenham_1_0_newcastle_united_james_maddison_31/',
+ 'info_dict': {
+ 'id': 'xbmj4t3igy1d1',
+ 'ext': 'mp4',
+ 'display_id': '1cxwzso',
+ 'title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'',
+ 'uploader': 'Woodstovia',
+ 'channel_id': 'soccer',
+ 'duration': 30,
+ 'upload_date': '20240522',
+ 'timestamp': 1716373798,
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ 'subtitles': {'en': 'mincount:1'},
+ },
+ 'params': {
+ 'skip_download': True,
+ 'writesubtitles': True,
+ },
+ }, {
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
'only_matching': True,
}, {
@@ -197,6 +244,12 @@ class RedditIE(InfoExtractor):
elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
raise ExtractorError('Unable to login, no cookie was returned')
+ def _get_subtitles(self, video_id):
+ # Fallback if there were no subtitles provided by DASH or HLS manifests
+ caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt'
+ if self._is_valid_url(caption_url, video_id, item='subtitles'):
+ return {'en': [{'url': caption_url}]}
+
def _real_extract(self, url):
host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
@@ -307,6 +360,10 @@ class RedditIE(InfoExtractor):
dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd'
hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8'
+ qs = traverse_obj(parse_qs(hls_playlist_url), {
+ 'f': ('f', 0, {lambda x: ','.join([x, 'subsAll']) if x else 'hd,subsAll'}),
+ })
+ hls_playlist_url = update_url_query(hls_playlist_url, qs)
formats = [{
'url': unescapeHTML(reddit_video['fallback_url']),
@@ -332,7 +389,7 @@ class RedditIE(InfoExtractor):
'id': video_id,
'display_id': display_id,
'formats': formats,
- 'subtitles': subtitles,
+ 'subtitles': subtitles or self.extract_subtitles(video_id),
'duration': int_or_none(reddit_video.get('duration')),
}
diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py
index f945320..d0546bb 100644
--- a/yt_dlp/extractor/redgifs.py
+++ b/yt_dlp/extractor/redgifs.py
@@ -5,10 +5,10 @@ from ..compat import compat_parse_qs
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
+ OnDemandPagedList,
int_or_none,
qualities,
try_get,
- OnDemandPagedList,
)
diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py
index 965abbe..14ed0ed 100644
--- a/yt_dlp/extractor/redtube.py
+++ b/yt_dlp/extractor/redtube.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
merge_dicts,
str_to_int,
diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py
index 0a8f13b..9c9bac6 100644
--- a/yt_dlp/extractor/reuters.py
+++ b/yt_dlp/extractor/reuters.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- js_to_json,
int_or_none,
+ js_to_json,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py
index 8d29b30..bc59ed0 100644
--- a/yt_dlp/extractor/rmcdecouverte.py
+++ b/yt_dlp/extractor/rmcdecouverte.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
+from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
index 5099f3a..3bc5f3c 100644
--- a/yt_dlp/extractor/rokfin.py
+++ b/yt_dlp/extractor/rokfin.py
@@ -1,8 +1,8 @@
+import datetime as dt
import itertools
import json
import re
import urllib.parse
-from datetime import datetime
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
@@ -156,7 +156,7 @@ class RokfinIE(InfoExtractor):
self.raise_login_required('This video is only available to premium users', True, method='cookies')
elif scheduled:
self.raise_no_formats(
- f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
+ f'Stream is offline; scheduled for {dt.datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
video_id=video_id, expected=True)
uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username'))
diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py
index 7ba80d4..729804d 100644
--- a/yt_dlp/extractor/rte.py
+++ b/yt_dlp/extractor/rte.py
@@ -3,13 +3,13 @@ import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
float_or_none,
parse_iso8601,
str_or_none,
try_get,
unescapeHTML,
url_or_none,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py
index 5928a20..ec78d0a 100644
--- a/yt_dlp/extractor/rtp.py
+++ b/yt_dlp/extractor/rtp.py
@@ -1,9 +1,10 @@
-from .common import InfoExtractor
-from ..utils import js_to_json
-import re
+import base64
import json
+import re
import urllib.parse
-import base64
+
+from .common import InfoExtractor
+from ..utils import js_to_json
class RTPIE(InfoExtractor):
diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py
index 741c472..e7dcd5f 100644
--- a/yt_dlp/extractor/rtvcplay.py
+++ b/yt_dlp/extractor/rtvcplay.py
@@ -1,16 +1,17 @@
import re
-from .common import InfoExtractor, ExtractorError
+from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- int_or_none,
float_or_none,
+ int_or_none,
js_to_json,
mimetype2ext,
traverse_obj,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py
index a84a78d..defb8d7 100644
--- a/yt_dlp/extractor/rtvs.py
+++ b/yt_dlp/extractor/rtvs.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import (
parse_duration,
traverse_obj,
diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py
index 287824d..eb12f32 100644
--- a/yt_dlp/extractor/rutube.py
+++ b/yt_dlp/extractor/rutube.py
@@ -5,8 +5,8 @@ from ..compat import (
compat_str,
)
from ..utils import (
- determine_ext,
bool_or_none,
+ determine_ext,
int_or_none,
parse_qs,
try_get,
diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py
index d7f9a73..726d491 100644
--- a/yt_dlp/extractor/rutv.py
+++ b/yt_dlp/extractor/rutv.py
@@ -1,11 +1,7 @@
import re
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- str_to_int
-)
+from ..utils import ExtractorError, int_or_none, str_to_int
class RUTVIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py
index 33f6652..dc61387 100644
--- a/yt_dlp/extractor/ruutu.py
+++ b/yt_dlp/extractor/ruutu.py
@@ -4,8 +4,8 @@ import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
find_xpath_attr,
int_or_none,
traverse_obj,
diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py
index 8d322d7..17dff0a 100644
--- a/yt_dlp/extractor/safari.py
+++ b/yt_dlp/extractor/safari.py
@@ -2,7 +2,6 @@ import json
import re
from .common import InfoExtractor
-
from ..compat import (
compat_parse_qs,
compat_urlparse,
diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py
index 3912f77..85d51cd 100644
--- a/yt_dlp/extractor/scrippsnetworks.py
+++ b/yt_dlp/extractor/scrippsnetworks.py
@@ -1,8 +1,8 @@
-import json
import hashlib
+import json
-from .aws import AWSIE
from .anvato import AnvatoIE
+from .aws import AWSIE
from .common import InfoExtractor
from ..utils import (
smuggle_url,
diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py
index 9c2ca8c..fc91d60 100644
--- a/yt_dlp/extractor/scte.py
+++ b/yt_dlp/extractor/scte.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- decode_packed_codes,
ExtractorError,
+ decode_packed_codes,
urlencode_postdata,
)
diff --git a/yt_dlp/extractor/sejmpl.py b/yt_dlp/extractor/sejmpl.py
index 29cb015..eb433d2 100644
--- a/yt_dlp/extractor/sejmpl.py
+++ b/yt_dlp/extractor/sejmpl.py
@@ -1,4 +1,4 @@
-import datetime
+import datetime as dt
from .common import InfoExtractor
from .redge import RedCDNLivxIE
@@ -13,16 +13,16 @@ from ..utils.traversal import traverse_obj
def is_dst(date):
- last_march = datetime.datetime(date.year, 3, 31)
- last_october = datetime.datetime(date.year, 10, 31)
- last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7)
- last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7)
+ last_march = dt.datetime(date.year, 3, 31)
+ last_october = dt.datetime(date.year, 10, 31)
+ last_sunday_march = last_march - dt.timedelta(days=last_march.isoweekday() % 7)
+ last_sunday_october = last_october - dt.timedelta(days=last_october.isoweekday() % 7)
return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
def rfc3339_to_atende(date):
- date = datetime.datetime.fromisoformat(date)
- date = date + datetime.timedelta(hours=1 if is_dst(date) else 0)
+ date = dt.datetime.fromisoformat(date)
+ date = date + dt.timedelta(hours=1 if is_dst(date) else 0)
return int((date.timestamp() - 978307200) * 1000)
diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py
index 1ecea71..99fcf51 100644
--- a/yt_dlp/extractor/sendtonews.py
+++ b/yt_dlp/extractor/sendtonews.py
@@ -2,12 +2,12 @@ import re
from .common import InfoExtractor
from ..utils import (
+ determine_protocol,
float_or_none,
- parse_iso8601,
- update_url_query,
int_or_none,
- determine_protocol,
+ parse_iso8601,
unescapeHTML,
+ update_url_query,
)
diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py
index 79e8885..b31d566 100644
--- a/yt_dlp/extractor/seznamzpravy.py
+++ b/yt_dlp/extractor/seznamzpravy.py
@@ -4,11 +4,11 @@ from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
- urljoin,
int_or_none,
parse_codecs,
parse_qs,
try_get,
+ urljoin,
)
diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py
index d509e88..89aee27 100644
--- a/yt_dlp/extractor/shahid.py
+++ b/yt_dlp/extractor/shahid.py
@@ -5,9 +5,9 @@ import re
from .aws import AWSIE
from ..networking.exceptions import HTTPError
from ..utils import (
- clean_html,
ExtractorError,
InAdvancePagedList,
+ clean_html,
int_or_none,
parse_iso8601,
str_or_none,
diff --git a/yt_dlp/extractor/sharepoint.py b/yt_dlp/extractor/sharepoint.py
new file mode 100644
index 0000000..d4d5af0
--- /dev/null
+++ b/yt_dlp/extractor/sharepoint.py
@@ -0,0 +1,112 @@
+import json
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import determine_ext, int_or_none, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class SharePointIE(InfoExtractor):
+ _BASE_URL_RE = r'https?://[\w-]+\.sharepoint\.com/'
+ _VALID_URL = [
+ rf'{_BASE_URL_RE}:v:/[a-z]/(?:[^/?#]+/)*(?P<id>[^/?#]{{46}})/?(?:$|[?#])',
+ rf'{_BASE_URL_RE}(?!:v:)(?:[^/?#]+/)*stream\.aspx\?(?:[^#]+&)?id=(?P<id>[^&#]+)',
+ ]
+ _TESTS = [{
+ 'url': 'https://lut-my.sharepoint.com/:v:/g/personal/juha_eerola_student_lab_fi/EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw?e=ZpQOOw',
+ 'md5': '2950821d0d4937a0a76373782093b435',
+ 'info_dict': {
+ 'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
+ 'display_id': 'EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw',
+ 'ext': 'mp4',
+ 'title': 'CmvpJST',
+ 'duration': 54.567,
+ 'thumbnail': r're:https://.+/thumbnail',
+ 'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
+ },
+ }, {
+ 'url': 'https://greaternyace.sharepoint.com/:v:/s/acementornydrive/ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg?e=PQUfVb',
+ 'md5': 'c496a01644223273bff12e93e501afd1',
+ 'info_dict': {
+ 'id': '01QI4AVTZ3ESFZPAD42VCKB5CZKAGLFVYB',
+ 'display_id': 'ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg',
+ 'ext': 'mp4',
+ 'title': '930103681233985536',
+ 'duration': 3797.326,
+ 'thumbnail': r're:https://.+/thumbnail',
+ },
+ }, {
+ 'url': 'https://lut-my.sharepoint.com/personal/juha_eerola_student_lab_fi/_layouts/15/stream.aspx?id=%2Fpersonal%2Fjuha_eerola_student_lab_fi%2FDocuments%2FM-DL%2FCmvpJST.mp4&ga=1&referrer=StreamWebApp.Web&referrerScenario=AddressBarCopied.view',
+ 'info_dict': {
+ 'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
+ 'display_id': '/personal/juha_eerola_student_lab_fi/Documents/M-DL/CmvpJST.mp4',
+ 'ext': 'mp4',
+ 'title': 'CmvpJST',
+ 'duration': 54.567,
+ 'thumbnail': r're:https://.+/thumbnail',
+ 'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
+ },
+ 'skip': 'Session cookies needed',
+ }, {
+ 'url': 'https://izoobasisschool.sharepoint.com/:v:/g/Eaqleq8COVBIvIPvod0U27oBypC6aWOkk8ptuDpmJ6arHw',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://uskudaredutr-my.sharepoint.com/:v:/g/personal/songul_turkaydin_uskudar_edu_tr/EbTf-VRUIbtGuIN73tx1MuwBCHBOmNcWNqSLw61Fd2_o0g?e=n5Vkof',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://epam-my.sharepoint.com/:v:/p/dzmitry_tamashevich/Ec4ZOs-rATZHjFYZWVxjczEB649FCoYFKDV_x3RxZiWAGA?e=4hswgA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://microsoft.sharepoint.com/:v:/t/MicrosoftSPARKRecordings-MSFTInternal/EWCyeqByVWBAt8wDvNZdV-UB0BvU5YVbKm0UHgdrUlI6dg?e=QbPck6',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = urllib.parse.unquote(self._match_id(url))
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+ if urllib.parse.urlparse(urlh.url).hostname == 'login.microsoftonline.com':
+ self.raise_login_required(
+ 'Session cookies are required for this URL and can be passed '
+ 'with the --cookies option. The --cookies-from-browser option will not work', method=None)
+
+ video_data = self._search_json(r'g_fileInfo\s*=', webpage, 'player config', display_id)
+ video_id = video_data['VroomItemId']
+
+ parsed_url = urllib.parse.urlparse(video_data['.transformUrl'])
+ base_media_url = urllib.parse.urlunparse(parsed_url._replace(
+ path=urllib.parse.urljoin(f'{parsed_url.path}/', '../videomanifest'),
+ query=urllib.parse.urlencode({
+ **urllib.parse.parse_qs(parsed_url.query),
+ 'cTag': video_data['.ctag'],
+ 'action': 'Access',
+ 'part': 'index',
+ }, doseq=True)))
+
+ # Web player adds more params to the format URLs but we still get all formats without them
+ formats = self._extract_mpd_formats(
+ base_media_url, video_id, mpd_id='dash', query={'format': 'dash'}, fatal=False)
+ for hls_type in ('hls', 'hls-vnext'):
+ formats.extend(self._extract_m3u8_formats(
+ base_media_url, video_id, 'mp4', m3u8_id=hls_type,
+ query={'format': hls_type}, fatal=False, quality=-2))
+
+ if video_url := traverse_obj(video_data, ('downloadUrl', {url_or_none})):
+ formats.append({
+ 'url': video_url,
+ 'ext': determine_ext(video_data.get('extension') or video_data.get('name')),
+ 'quality': 1,
+ 'format_id': 'source',
+ 'filesize': int_or_none(video_data.get('size')),
+ 'vcodec': 'none' if video_data.get('isAudio') is True else None,
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_data.get('title') or video_data.get('displayName'),
+ 'display_id': display_id,
+ 'uploader_id': video_data.get('authorId'),
+ 'duration': traverse_obj(video_data, (
+ 'MediaServiceFastMetadata', {json.loads}, 'media', 'duration', {lambda x: x / 10000000})),
+ 'thumbnail': url_or_none(video_data.get('thumbnailUrl')),
+ }
diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py
index ec9938b..cca86ed 100644
--- a/yt_dlp/extractor/shemaroome.py
+++ b/yt_dlp/extractor/shemaroome.py
@@ -4,8 +4,8 @@ from ..compat import (
compat_b64decode,
)
from ..utils import (
- bytes_to_intlist,
ExtractorError,
+ bytes_to_intlist,
intlist_to_bytes,
unified_strdate,
)
diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py
index ef93b92..44619a1 100644
--- a/yt_dlp/extractor/sixplay.py
+++ b/yt_dlp/extractor/sixplay.py
@@ -6,8 +6,8 @@ from ..utils import (
determine_ext,
int_or_none,
parse_qs,
- try_get,
qualities,
+ try_get,
)
diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py
index 8677827..234703c 100644
--- a/yt_dlp/extractor/skynewsarabia.py
+++ b/yt_dlp/extractor/skynewsarabia.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- parse_iso8601,
parse_duration,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py
index c0ff4f9..a41ad30 100644
--- a/yt_dlp/extractor/sohu.py
+++ b/yt_dlp/extractor/sohu.py
@@ -8,13 +8,13 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
- url_or_none,
- unified_timestamp,
+ int_or_none,
+ traverse_obj,
try_get,
+ unified_timestamp,
+ url_or_none,
urljoin,
- traverse_obj,
)
diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py
index 4379572..7c914ac 100644
--- a/yt_dlp/extractor/sonyliv.py
+++ b/yt_dlp/extractor/sonyliv.py
@@ -1,4 +1,5 @@
-import datetime
+import datetime as dt
+import itertools
import json
import math
import random
@@ -12,8 +13,8 @@ from ..utils import (
int_or_none,
jwt_decode_hs256,
try_call,
- try_get,
)
+from ..utils.traversal import traverse_obj
class SonyLIVIE(InfoExtractor):
@@ -93,7 +94,7 @@ class SonyLIVIE(InfoExtractor):
'mobileNumber': username,
'channelPartnerID': 'MSMIND',
'country': 'IN',
- 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+ 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
'otpSize': 6,
'loginType': 'REGISTERORSIGNIN',
'isMobileMandatory': True,
@@ -110,7 +111,7 @@ class SonyLIVIE(InfoExtractor):
'otp': self._get_tfa_info('OTP'),
'dmaId': 'IN',
'ageConfirmation': True,
- 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+ 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
'isMobileMandatory': True,
}).encode())
if otp_verify_json['resultCode'] == 'KO':
@@ -183,17 +184,21 @@ class SonyLIVIE(InfoExtractor):
class SonyLIVSeriesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P<id>\d{10})$'
+ _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P<id>\d{10})/?(?:$|[?#])'
_TESTS = [{
'url': 'https://www.sonyliv.com/shows/adaalat-1700000091',
- 'playlist_mincount': 456,
+ 'playlist_mincount': 452,
'info_dict': {
'id': '1700000091',
},
+ }, {
+ 'url': 'https://www.sonyliv.com/shows/beyhadh-1700000007/',
+ 'playlist_mincount': 358,
+ 'info_dict': {
+ 'id': '1700000007',
+ },
}]
- _API_SHOW_URL = "https://apiv2.sonyliv.com/AGL/1.9/R/ENG/WEB/IN/DL/DETAIL/{}?kids_safe=false&from=0&to=49"
- _API_EPISODES_URL = "https://apiv2.sonyliv.com/AGL/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{}?from=0&to=1000&orderBy=episodeNumber&sortOrder=asc"
- _API_SECURITY_URL = 'https://apiv2.sonyliv.com/AGL/1.4/A/ENG/WEB/ALL/GETTOKEN'
+ _API_BASE = 'https://apiv2.sonyliv.com/AGL'
def _entries(self, show_id):
headers = {
@@ -201,19 +206,34 @@ class SonyLIVSeriesIE(InfoExtractor):
'Referer': 'https://www.sonyliv.com',
}
headers['security_token'] = self._download_json(
- self._API_SECURITY_URL, video_id=show_id, headers=headers,
- note='Downloading security token')['resultObj']
- seasons = try_get(
- self._download_json(self._API_SHOW_URL.format(show_id), video_id=show_id, headers=headers),
- lambda x: x['resultObj']['containers'][0]['containers'], list)
- for season in seasons or []:
- season_id = season['id']
- episodes = try_get(
- self._download_json(self._API_EPISODES_URL.format(season_id), video_id=season_id, headers=headers),
- lambda x: x['resultObj']['containers'][0]['containers'], list)
- for episode in episodes or []:
- video_id = episode.get('id')
- yield self.url_result('sonyliv:%s' % video_id, ie=SonyLIVIE.ie_key(), video_id=video_id)
+ f'{self._API_BASE}/1.4/A/ENG/WEB/ALL/GETTOKEN', show_id,
+ 'Downloading security token', headers=headers)['resultObj']
+ seasons = traverse_obj(self._download_json(
+ f'{self._API_BASE}/1.9/R/ENG/WEB/IN/DL/DETAIL/{show_id}', show_id,
+ 'Downloading series JSON', headers=headers, query={
+ 'kids_safe': 'false',
+ 'from': '0',
+ 'to': '49',
+ }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
+ for season in seasons:
+ season_id = str(season['id'])
+ note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
+ cursor = 0
+ for page_num in itertools.count(1):
+ episodes = traverse_obj(self._download_json(
+ f'{self._API_BASE}/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{season_id}',
+ season_id, f'Downloading {note} page {page_num} JSON', headers=headers, query={
+ 'from': str(cursor),
+ 'to': str(cursor + 99),
+ 'orderBy': 'episodeNumber',
+ 'sortOrder': 'asc',
+ }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
+ if not episodes:
+ break
+ for episode in episodes:
+ video_id = str(episode['id'])
+ yield self.url_result(f'sonyliv:{video_id}', SonyLIVIE, video_id)
+ cursor += 100
def _real_extract(self, url):
show_id = self._match_id(url)
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index a7c2afd..3581461 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -1,30 +1,29 @@
+import functools
import itertools
-import re
import json
-# import random
+import re
-from .common import (
- InfoExtractor,
- SearchInfoExtractor
-)
+from .common import InfoExtractor, SearchInfoExtractor
from ..compat import compat_str
-from ..networking import HEADRequest, Request
+from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import (
- error_to_compat_str,
+ KNOWN_EXTENSIONS,
ExtractorError,
+ error_to_compat_str,
float_or_none,
int_or_none,
- KNOWN_EXTENSIONS,
+ join_nonempty,
mimetype2ext,
parse_qs,
str_or_none,
- try_get,
+ try_call,
unified_timestamp,
update_url_query,
url_or_none,
urlhandle_detect_ext,
)
+from ..utils.traversal import traverse_obj
class SoundcloudEmbedIE(InfoExtractor):
@@ -54,7 +53,6 @@ class SoundcloudBaseIE(InfoExtractor):
_API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
_API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
_API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
- _access_token = None
_HEADERS = {}
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
@@ -72,6 +70,16 @@ class SoundcloudBaseIE(InfoExtractor):
'original': 0,
}
+ _DEFAULT_FORMATS = ['http_aac', 'hls_aac', 'http_opus', 'hls_opus', 'http_mp3', 'hls_mp3']
+
+ @functools.cached_property
+ def _is_requested(self):
+ return re.compile(r'|'.join(set(
+ re.escape(pattern).replace(r'\*', r'.*') if pattern != 'default'
+ else '|'.join(map(re.escape, self._DEFAULT_FORMATS))
+ for pattern in self._configuration_arg('formats', ['default'], ie_key=SoundcloudIE)
+ ))).fullmatch
+
def _store_client_id(self, client_id):
self.cache.store('soundcloud', 'client_id', client_id)
@@ -112,21 +120,31 @@ class SoundcloudBaseIE(InfoExtractor):
def _initialize_pre_login(self):
self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
+ def _verify_oauth_token(self, token):
+ if self._request_webpage(
+ self._API_VERIFY_AUTH_TOKEN % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
+ None, note='Verifying login token...', fatal=False,
+ data=json.dumps({'session': {'access_token': token}}).encode()):
+ self._HEADERS['Authorization'] = f'OAuth {token}'
+ self.report_login()
+ else:
+ self.report_warning('Provided authorization token is invalid. Continuing as guest')
+
+ def _real_initialize(self):
+ if self._HEADERS:
+ return
+ if token := try_call(lambda: self._get_cookies(self._BASE_URL)['oauth_token'].value):
+ self._verify_oauth_token(token)
+
def _perform_login(self, username, password):
if username != 'oauth':
- self.report_warning(
+ raise ExtractorError(
'Login using username and password is not currently supported. '
- 'Use "--username oauth --password <oauth_token>" to login using an oauth token')
- self._access_token = password
- query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
- payload = {'session': {'access_token': self._access_token}}
- token_verification = Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
- response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
- if response is not False:
- self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
- self.report_login()
- else:
- self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
+ 'Use "--username oauth --password <oauth_token>" to login using an oauth token, '
+ f'or else {self._login_hint(method="cookies")}', expected=True)
+ if self._HEADERS:
+ return
+ self._verify_oauth_token(password)
r'''
def genDevId():
@@ -147,14 +165,17 @@ class SoundcloudBaseIE(InfoExtractor):
'user_agent': self._USER_AGENT
}
- query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
- login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
- response = self._download_json(login, None)
- self._access_token = response.get('session').get('access_token')
- if not self._access_token:
- self.report_warning('Unable to get access token, login may has failed')
- else:
- self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ response = self._download_json(
+ self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
+ None, note='Verifying login token...', fatal=False,
+ data=json.dumps(payload).encode())
+
+ if token := traverse_obj(response, ('session', 'access_token', {str})):
+ self._HEADERS['Authorization'] = f'OAuth {token}'
+ self.report_login()
+ return
+
+ raise ExtractorError('Unable to get access token, login may have failed', expected=True)
'''
# signature generation
@@ -207,7 +228,7 @@ class SoundcloudBaseIE(InfoExtractor):
redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
if redirect_url:
urlh = self._request_webpage(
- HEADRequest(redirect_url), track_id, fatal=False)
+ HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False)
if urlh:
format_url = urlh.url
format_urls.add(format_url)
@@ -217,6 +238,7 @@ class SoundcloudBaseIE(InfoExtractor):
'filesize': int_or_none(urlh.headers.get('Content-Length')),
'url': format_url,
'quality': 10,
+ 'format_note': 'Original',
})
def invalid_url(url):
@@ -233,9 +255,13 @@ class SoundcloudBaseIE(InfoExtractor):
format_id_list.append(protocol)
ext = f.get('ext')
if ext == 'aac':
- f['abr'] = '256'
+ f.update({
+ 'abr': 256,
+ 'quality': 5,
+ 'format_note': 'Premium',
+ })
for k in ('ext', 'abr'):
- v = f.get(k)
+ v = str_or_none(f.get(k))
if v:
format_id_list.append(v)
preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
@@ -244,7 +270,7 @@ class SoundcloudBaseIE(InfoExtractor):
abr = f.get('abr')
if abr:
f['abr'] = int(abr)
- if protocol == 'hls':
+ if protocol in ('hls', 'hls-aes'):
protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
else:
protocol = 'http'
@@ -256,37 +282,54 @@ class SoundcloudBaseIE(InfoExtractor):
formats.append(f)
# New API
- transcodings = try_get(
- info, lambda x: x['media']['transcodings'], list) or []
- for t in transcodings:
- if not isinstance(t, dict):
- continue
- format_url = url_or_none(t.get('url'))
- if not format_url:
- continue
- stream = None if extract_flat else self._download_json(
- format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
- if not isinstance(stream, dict):
- continue
- stream_url = url_or_none(stream.get('url'))
- if invalid_url(stream_url):
- continue
- format_urls.add(stream_url)
- stream_format = t.get('format') or {}
- protocol = stream_format.get('protocol')
+ for t in traverse_obj(info, ('media', 'transcodings', lambda _, v: url_or_none(v['url']))):
+ if extract_flat:
+ break
+ format_url = t['url']
+
+ protocol = traverse_obj(t, ('format', 'protocol', {str}))
+ if protocol == 'progressive':
+ protocol = 'http'
if protocol != 'hls' and '/hls' in format_url:
protocol = 'hls'
+ if protocol == 'encrypted-hls' or '/encrypted-hls' in format_url:
+ protocol = 'hls-aes'
+
ext = None
- preset = str_or_none(t.get('preset'))
- if preset:
+ if preset := traverse_obj(t, ('preset', {str_or_none})):
ext = preset.split('_')[0]
if ext not in KNOWN_EXTENSIONS:
- ext = mimetype2ext(stream_format.get('mime_type'))
+ ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str})))
+
+ identifier = join_nonempty(protocol, ext, delim='_')
+ if not self._is_requested(identifier):
+ self.write_debug(f'"{identifier}" is not a requested format, skipping')
+ continue
+
+ stream = None
+ for retry in self.RetryManager(fatal=False):
+ try:
+ stream = self._download_json(
+ format_url, track_id, f'Downloading {identifier} format info JSON',
+ query=query, headers=self._HEADERS)
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 429:
+ self.report_warning(
+ 'You have reached the API rate limit, which is ~600 requests per '
+ '10 minutes. Use the --extractor-retries and --retry-sleep options '
+ 'to configure an appropriate retry count and wait time', only_once=True)
+ retry.error = e.cause
+ else:
+ self.report_warning(e.msg)
+
+ stream_url = traverse_obj(stream, ('url', {url_or_none}))
+ if invalid_url(stream_url):
+ continue
+ format_urls.add(stream_url)
add_format({
'url': stream_url,
'ext': ext,
- }, 'http' if protocol == 'progressive' else protocol,
- t.get('snipped') or '/preview/' in format_url)
+ }, protocol, t.get('snipped') or '/preview/' in format_url)
for f in formats:
f['vcodec'] = 'none'
@@ -338,7 +381,7 @@ class SoundcloudBaseIE(InfoExtractor):
'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
- 'genre': info.get('genre'),
+ 'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
'formats': formats if not extract_flat else None
}
@@ -372,10 +415,10 @@ class SoundcloudIE(SoundcloudBaseIE):
_TESTS = [
{
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
- 'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+ 'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
'info_dict': {
'id': '62986583',
- 'ext': 'mp3',
+ 'ext': 'opus',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music',
@@ -388,6 +431,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
+ 'uploader_url': 'https://soundcloud.com/ethmusic',
+ 'genres': [],
}
},
# geo-restricted
@@ -395,7 +441,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': {
'id': '47127627',
- 'ext': 'mp3',
+ 'ext': 'opus',
'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept',
@@ -408,6 +454,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/the-concept-band',
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
+ 'genres': ['Alternative'],
},
},
# private link
@@ -429,6 +478,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/jaimemf',
+ 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+ 'genres': ['youtubedl'],
},
},
# private link (alt format)
@@ -450,6 +502,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/jaimemf',
+ 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+ 'genres': ['youtubedl'],
},
},
# downloadable song
@@ -459,6 +514,21 @@ class SoundcloudIE(SoundcloudBaseIE):
'info_dict': {
'id': '343609555',
'ext': 'wav',
+ 'title': 'The Following',
+ 'description': '',
+ 'uploader': '80M',
+ 'uploader_id': '312384765',
+ 'uploader_url': 'https://soundcloud.com/the80m',
+ 'upload_date': '20170922',
+ 'timestamp': 1506120436,
+ 'duration': 397.228,
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg',
+ 'license': 'all-rights-reserved',
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'view_count': int,
+ 'genres': ['Dance & EDM'],
},
},
# private link, downloadable format
@@ -480,6 +550,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
+ 'uploader_url': 'https://soundcloud.com/oriuplift',
+ 'genres': ['Trance'],
},
},
# no album art, use avatar pic for thumbnail
@@ -502,6 +575,8 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/garyvee',
+ 'genres': [],
},
'params': {
'skip_download': True,
@@ -509,13 +584,13 @@ class SoundcloudIE(SoundcloudBaseIE):
},
{
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
- 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+ 'md5': '8227c3473a4264df6b02ad7e5b7527ac',
'info_dict': {
'id': '583011102',
- 'ext': 'mp3',
+ 'ext': 'opus',
'title': 'Mezzo Valzer',
- 'description': 'md5:4138d582f81866a530317bae316e8b61',
- 'uploader': 'Micronie',
+ 'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
+ 'uploader': 'Giovanni Sarani',
'uploader_id': '3352531',
'timestamp': 1551394171,
'upload_date': '20190228',
@@ -526,6 +601,8 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'genres': ['Piano'],
+ 'uploader_url': 'https://soundcloud.com/giovannisarani',
},
},
{
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 493eea2..773ddd3 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- try_get,
- unified_timestamp
-)
+from ..utils import try_get, unified_timestamp
class SovietsClosetBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py
index 43da34a..c73f797 100644
--- a/yt_dlp/extractor/spankbang.py
+++ b/yt_dlp/extractor/spankbang.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
merge_dicts,
parse_duration,
parse_resolution,
diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py
index a98584a..bdb8ef4 100644
--- a/yt_dlp/extractor/springboardplatform.py
+++ b/yt_dlp/extractor/springboardplatform.py
@@ -4,11 +4,11 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
- xpath_attr,
- xpath_text,
- xpath_element,
unescapeHTML,
unified_timestamp,
+ xpath_attr,
+ xpath_element,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/stacommu.py b/yt_dlp/extractor/stacommu.py
index 1308c59..d2f207f 100644
--- a/yt_dlp/extractor/stacommu.py
+++ b/yt_dlp/extractor/stacommu.py
@@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
- _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
+ _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:vod'
_TESTS = [{
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
@@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
}, {
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
'only_matching': True,
+ }, {
+ 'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
+ 'only_matching': True,
}]
_API_PATH = 'videoEpisodes'
@@ -204,7 +207,7 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
- _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
+ _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:ppv'
_TESTS = [{
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
@@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
}, {
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
'only_matching': True,
+ }, {
+ 'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
+ 'only_matching': True,
}]
_API_PATH = 'events'
diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py
index bb6e8f1..312a4fd 100644
--- a/yt_dlp/extractor/startv.py
+++ b/yt_dlp/extractor/startv.py
@@ -3,10 +3,10 @@ from ..compat import (
compat_str,
)
from ..utils import (
- clean_html,
ExtractorError,
- traverse_obj,
+ clean_html,
int_or_none,
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py
index 2fd200f..46a15e6 100644
--- a/yt_dlp/extractor/stitcher.py
+++ b/yt_dlp/extractor/stitcher.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ExtractorError,
clean_html,
clean_podcast_url,
- ExtractorError,
int_or_none,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py
index 566f777..20a70a7 100644
--- a/yt_dlp/extractor/storyfire.py
+++ b/yt_dlp/extractor/storyfire.py
@@ -2,9 +2,9 @@ import functools
from .common import InfoExtractor
from ..utils import (
+ OnDemandPagedList,
format_field,
int_or_none,
- OnDemandPagedList,
smuggle_url,
)
diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py
index 462861e..c303ac5 100644
--- a/yt_dlp/extractor/streamable.py
+++ b/yt_dlp/extractor/streamable.py
@@ -3,8 +3,8 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
- try_get,
parse_codecs,
+ try_get,
)
diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py
index b9523c8..a847925 100644
--- a/yt_dlp/extractor/stripchat.py
+++ b/yt_dlp/extractor/stripchat.py
@@ -3,7 +3,7 @@ from ..utils import (
ExtractorError,
UserNotLive,
lowercase_escape,
- traverse_obj
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py
index 8b3e635..0ab7801 100644
--- a/yt_dlp/extractor/stv.py
+++ b/yt_dlp/extractor/stv.py
@@ -41,7 +41,7 @@ class STVPlayerIE(InfoExtractor):
ptype, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id, fatal=False) or ''
- props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
+ props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
player_api_cache = try_get(
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py
index 708873a..501156e 100644
--- a/yt_dlp/extractor/sunporno.py
+++ b/yt_dlp/extractor/sunporno.py
@@ -2,10 +2,10 @@ import re
from .common import InfoExtractor
from ..utils import (
- parse_duration,
+ determine_ext,
int_or_none,
+ parse_duration,
qualities,
- determine_ext,
)
diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py
index bd2d738..29e5e57 100644
--- a/yt_dlp/extractor/syfy.py
+++ b/yt_dlp/extractor/syfy.py
@@ -1,7 +1,7 @@
from .adobepass import AdobePassIE
from ..utils import (
- update_url_query,
smuggle_url,
+ update_url_query,
)
diff --git a/yt_dlp/extractor/taptap.py b/yt_dlp/extractor/taptap.py
new file mode 100644
index 0000000..56f2f0e
--- /dev/null
+++ b/yt_dlp/extractor/taptap.py
@@ -0,0 +1,275 @@
+import re
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ join_nonempty,
+ str_or_none,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class TapTapBaseIE(InfoExtractor):
+ _X_UA = 'V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&DS=Android&UID={uuid}&OS=Windows&OSV=10&DT=PC'
+ _VIDEO_API = 'https://www.taptap.cn/webapiv2/video-resource/v1/multi-get'
+ _INFO_API = None
+ _INFO_QUERY_KEY = 'id'
+ _DATA_PATH = None
+ _ID_PATH = None
+ _META_PATH = None
+
+ def _get_api(self, url, video_id, query, **kwargs):
+ query = {**query, 'X-UA': self._X_UA.format(uuid=uuid.uuid4())}
+ return self._download_json(url, video_id, query=query, **kwargs)['data']
+
+ def _extract_video(self, video_id):
+ video_data = self._get_api(self._VIDEO_API, video_id, query={'video_ids': video_id})['list'][0]
+
+ # h265 playlist contains both h265 and h264 formats
+ video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any))
+ formats = self._extract_m3u8_formats(video_url, video_id, fatal=False)
+ for format in formats:
+ if re.search(r'^(hev|hvc|hvt)\d', format.get('vcodec', '')):
+ format['format_id'] = join_nonempty(format.get('format_id'), 'h265', delim='_')
+
+ return {
+ 'id': str(video_id),
+ 'formats': formats,
+ **traverse_obj(video_data, ({
+ 'duration': ('info', 'duration', {int_or_none}),
+ 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}),
+ }), get_all=False)
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ query = {self._INFO_QUERY_KEY: video_id}
+
+ data = traverse_obj(
+ self._get_api(self._INFO_API, video_id, query=query), self._DATA_PATH)
+
+ metainfo = traverse_obj(data, self._META_PATH)
+ entries = [{
+ **metainfo,
+ **self._extract_video(id)
+ } for id in set(traverse_obj(data, self._ID_PATH))]
+
+ return self.playlist_result(entries, **metainfo, id=video_id)
+
+
+class TapTapMomentIE(TapTapBaseIE):
+ _VALID_URL = r'https?://www\.taptap\.cn/moment/(?P<id>\d+)'
+ _INFO_API = 'https://www.taptap.cn/webapiv2/moment/v3/detail'
+ _ID_PATH = ('moment', 'topic', (('videos', ...), 'pin_video'), 'video_id')
+ _META_PATH = ('moment', {
+ 'timestamp': ('created_time', {int_or_none}),
+ 'modified_timestamp': ('edited_time', {int_or_none}),
+ 'uploader': ('author', 'user', 'name', {str}),
+ 'uploader_id': ('author', 'user', 'id', {int}, {str_or_none}),
+ 'title': ('topic', 'title', {str}),
+ 'description': ('topic', 'summary', {str}),
+ })
+ _TESTS = [{
+ 'url': 'https://www.taptap.cn/moment/194618230982052443',
+ 'info_dict': {
+ 'id': '194618230982052443',
+ 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
+ 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
+ 'timestamp': 1633453402,
+ 'upload_date': '20211005',
+ 'modified_timestamp': 1633453402,
+ 'modified_date': '20211005',
+ 'uploader': '乌酱',
+ 'uploader_id': '532896',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2202584',
+ 'ext': 'mp4',
+ 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
+ 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
+ 'duration': 66,
+ 'timestamp': 1633453402,
+ 'upload_date': '20211005',
+ 'modified_timestamp': 1633453402,
+ 'modified_date': '20211005',
+ 'uploader': '乌酱',
+ 'uploader_id': '532896',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.taptap.cn/moment/521630629209573493',
+ 'info_dict': {
+ 'id': '521630629209573493',
+ 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
+ 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
+ 'timestamp': 1711425600,
+ 'upload_date': '20240326',
+ 'modified_timestamp': 1711425600,
+ 'modified_date': '20240326',
+ 'uploader': '崩坏:星穹铁道',
+ 'uploader_id': '414732580',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '4006511',
+ 'ext': 'mp4',
+ 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
+ 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
+ 'duration': 173,
+ 'timestamp': 1711425600,
+ 'upload_date': '20240326',
+ 'modified_timestamp': 1711425600,
+ 'modified_date': '20240326',
+ 'uploader': '崩坏:星穹铁道',
+ 'uploader_id': '414732580',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.taptap.cn/moment/540493587511511299',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': '540493587511511299',
+ 'title': '中式民俗解谜《纸嫁衣7》、新系列《纸不语》公布!',
+ 'description': 'md5:d60842350e686ddb242291ddfb8e39c9',
+ 'timestamp': 1715920200,
+ 'upload_date': '20240517',
+ 'modified_timestamp': 1715942225,
+ 'modified_date': '20240517',
+ 'uploader': 'TapTap 编辑',
+ 'uploader_id': '7159244',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class TapTapAppIE(TapTapBaseIE):
+ _VALID_URL = r'https?://www\.taptap\.cn/app/(?P<id>\d+)'
+ _INFO_API = 'https://www.taptap.cn/webapiv2/app/v4/detail'
+ _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
+ _META_PATH = {
+ 'title': ('title', {str}),
+ 'description': ('description', 'text', {str}, {clean_html}),
+ }
+ _TESTS = [{
+ 'url': 'https://www.taptap.cn/app/168332',
+ 'info_dict': {
+ 'id': '168332',
+ 'title': '原神',
+ 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
+ },
+ 'playlist_count': 2,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '4058443',
+ 'ext': 'mp4',
+ 'title': '原神',
+ 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
+ 'duration': 26,
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '4058462',
+ 'ext': 'mp4',
+ 'title': '原神',
+ 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
+ 'duration': 295,
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class TapTapIntlBase(TapTapBaseIE):
+ _X_UA = 'V=1&PN=WebAppIntl2&LANG=zh_TW&VN_CODE=115&VN=0.1.0&LOC=CN&PLT=PC&DS=Android&UID={uuid}&CURR=&DT=PC&OS=Windows&OSV=NT%208.0.0'
+ _VIDEO_API = 'https://www.taptap.io/webapiv2/video-resource/v1/multi-get'
+
+
+class TapTapAppIntlIE(TapTapIntlBase):
+ _VALID_URL = r'https?://www\.taptap\.io/app/(?P<id>\d+)'
+ _INFO_API = 'https://www.taptap.io/webapiv2/i/app/v5/detail'
+ _DATA_PATH = 'app'
+ _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
+ _META_PATH = {
+ 'title': ('title', {str}),
+ 'description': ('description', 'text', {str}, {clean_html}),
+ }
+ _TESTS = [{
+ 'url': 'https://www.taptap.io/app/233287',
+ 'info_dict': {
+ 'id': '233287',
+ 'title': '《虹彩六號 M》',
+ 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2149708997',
+ 'ext': 'mp4',
+ 'title': '《虹彩六號 M》',
+ 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
+ 'duration': 78,
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class TapTapPostIntlIE(TapTapIntlBase):
+ _VALID_URL = r'https?://www\.taptap\.io/post/(?P<id>\d+)'
+ _INFO_API = 'https://www.taptap.io/webapiv2/creation/post/v1/detail'
+ _INFO_QUERY_KEY = 'id_str'
+ _DATA_PATH = 'post'
+ _ID_PATH = ((('videos', ...), 'pin_video'), 'video_id')
+ _META_PATH = {
+ 'timestamp': ('published_time', {int_or_none}),
+ 'modified_timestamp': ('edited_time', {int_or_none}),
+ 'uploader': ('user', 'name', {str}),
+ 'uploader_id': ('user', 'id', {int}, {str_or_none}),
+ 'title': ('title', {str}),
+ 'description': ('list_fields', 'summary', {str}),
+ }
+ _TESTS = [{
+ 'url': 'https://www.taptap.io/post/571785',
+ 'info_dict': {
+ 'id': '571785',
+ 'title': 'Arknights x Rainbow Six Siege | Event PV',
+ 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
+ 'timestamp': 1614664951,
+ 'upload_date': '20210302',
+ 'modified_timestamp': 1614664951,
+ 'modified_date': '20210302',
+ 'uploader': 'TapTap Editor',
+ 'uploader_id': '80224473',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2149491903',
+ 'ext': 'mp4',
+ 'title': 'Arknights x Rainbow Six Siege | Event PV',
+ 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
+ 'duration': 122,
+ 'timestamp': 1614664951,
+ 'upload_date': '20210302',
+ 'modified_timestamp': 1614664951,
+ 'modified_date': '20210302',
+ 'uploader': 'TapTap Editor',
+ 'uploader_id': '80224473',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py
index 808c6c7..4e17859 100644
--- a/yt_dlp/extractor/tbs.py
+++ b/yt_dlp/extractor/tbs.py
@@ -2,8 +2,8 @@ import re
from .turner import TurnerBaseIE
from ..compat import (
- compat_urllib_parse_urlparse,
compat_parse_qs,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
float_or_none,
diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py
index 5eac9aa..778fa12 100644
--- a/yt_dlp/extractor/teachable.py
+++ b/yt_dlp/extractor/teachable.py
@@ -3,10 +3,10 @@ import re
from .common import InfoExtractor
from .wistia import WistiaIE
from ..utils import (
- clean_html,
ExtractorError,
- int_or_none,
+ clean_html,
get_element_by_class,
+ int_or_none,
strip_or_none,
urlencode_postdata,
urljoin,
diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py
index 90a9762..7402409 100644
--- a/yt_dlp/extractor/teachertube.py
+++ b/yt_dlp/extractor/teachertube.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
qualities,
)
diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py
index d32f812..3fb899c 100644
--- a/yt_dlp/extractor/teamcoco.py
+++ b/yt_dlp/extractor/teamcoco.py
@@ -13,8 +13,8 @@ from ..utils import (
parse_qs,
traverse_obj,
unified_timestamp,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py
index dd802db..ba25cdc 100644
--- a/yt_dlp/extractor/teamtreehouse.py
+++ b/yt_dlp/extractor/teamtreehouse.py
@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
float_or_none,
get_element_by_class,
get_element_by_id,
diff --git a/yt_dlp/extractor/ted.py b/yt_dlp/extractor/ted.py
index c28a154..0969bbb 100644
--- a/yt_dlp/extractor/ted.py
+++ b/yt_dlp/extractor/ted.py
@@ -2,14 +2,13 @@ import itertools
import re
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
+ parse_duration,
str_to_int,
try_get,
- url_or_none,
unified_strdate,
- parse_duration,
+ url_or_none,
)
diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py
index 212af37..1705c2d 100644
--- a/yt_dlp/extractor/tele13.py
+++ b/yt_dlp/extractor/tele13.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
+ determine_ext,
js_to_json,
qualities,
- determine_ext,
)
diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py
index 72f67e4..a455375 100644
--- a/yt_dlp/extractor/tele5.py
+++ b/yt_dlp/extractor/tele5.py
@@ -1,89 +1,77 @@
-from .dplay import DPlayIE
-from ..compat import compat_urlparse
-from ..utils import (
- ExtractorError,
- extract_attributes,
-)
+import functools
+from .dplay import DiscoveryPlusBaseIE
+from ..utils import join_nonempty
+from ..utils.traversal import traverse_obj
-class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
- _WORKING = False
- _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _GEO_COUNTRIES = ['DE']
+
+class Tele5IE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P<parent_slug>[\w-]+)/(?P<slug_a>[\w-]+)(?:/(?P<slug_b>[\w-]+))?'
_TESTS = [{
- 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
+ # slug_a and slug_b
+ 'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane',
'info_dict': {
- 'id': '1549416',
+ 'id': '6852024',
'ext': 'mp4',
- 'upload_date': '20180814',
- 'timestamp': 1534290623,
- 'title': 'Pandorum',
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'Quarantäne',
+ 'description': 'md5:6af0373bd0fcc4f13e5d47701903d675',
+ 'episode': 'Episode 73',
+ 'episode_number': 73,
+ 'season': 'Season 4',
+ 'season_number': 4,
+ 'series': 'Stargate Atlantis',
+ 'upload_date': '20240525',
+ 'timestamp': 1716643200,
+ 'duration': 2503.2,
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg',
+ 'creators': ['Tele5'],
+ 'tags': [],
},
- 'skip': 'No longer available: "404 Seite nicht gefunden"',
}, {
- # jwplatform, nexx unavailable
- 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
+ # only slug_a
+ 'url': 'https://tele5.de/mediathek/inside-out',
'info_dict': {
- 'id': 'WJuiOlUp',
+ 'id': '6819502',
'ext': 'mp4',
- 'upload_date': '20200603',
- 'timestamp': 1591214400,
- 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters',
- 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97',
+ 'title': 'Inside out',
+ 'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd',
+ 'series': 'Inside out',
+ 'upload_date': '20240523',
+ 'timestamp': 1716494400,
+ 'duration': 5343.4,
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg',
+ 'creators': ['Tele5'],
+ 'tags': [],
},
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'No longer available, redirects to Filme page',
}, {
- 'url': 'https://tele5.de/mediathek/angel-of-mine/',
+ # playlist
+ 'url': 'https://tele5.de/mediathek/schlefaz',
'info_dict': {
- 'id': '1252360',
- 'ext': 'mp4',
- 'upload_date': '20220109',
- 'timestamp': 1641762000,
- 'title': 'Angel of Mine',
- 'description': 'md5:a72546a175e1286eb3251843a52d1ad7',
+ 'id': 'mediathek-schlefaz',
},
- 'params': {
- 'format': 'bestvideo',
- },
- }, {
- 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/anders-ist-sevda/',
- 'only_matching': True,
+ 'playlist_mincount': 3,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
- player_info = extract_attributes(player_element)
- asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
- endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
- source_type = player_info.get('sourcetype')
- if source_type:
- endpoint = '%s-%s' % (source_type, endpoint)
- try:
- return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
- except ExtractorError as e:
- if getattr(e, 'message', '') == 'Missing deviceId in context':
- self.report_drm(video_id)
- raise
+ parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b')
+ playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-')
+
+ query = {'environment': 'tele5', 'v': '2'}
+ if not slug_b:
+ endpoint = f'page/{slug_a}'
+ query['parent_slug'] = parent_slug
+ else:
+ endpoint = f'videos/{slug_b}'
+ query['filter[show.slug]'] = slug_a
+ cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query)
+
+ return self.playlist_result(map(
+ functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'),
+ traverse_obj(cms_data, ('blocks', ..., 'videoId', {str}))), playlist_id)
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': f'realm={realm}',
+ 'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py
index 9378ed0..380c84d 100644
--- a/yt_dlp/extractor/telewebion.py
+++ b/yt_dlp/extractor/telewebion.py
@@ -1,8 +1,8 @@
from __future__ import annotations
+import functools
import json
-from functools import partial
-from textwrap import dedent
+import textwrap
from .common import InfoExtractor
from ..utils import ExtractorError, format_field, int_or_none, parse_iso8601
@@ -10,7 +10,7 @@ from ..utils.traversal import traverse_obj
def _fmt_url(url):
- return partial(format_field, template=url, default=None)
+ return functools.partial(format_field, template=url, default=None)
class TelewebionIE(InfoExtractor):
@@ -88,7 +88,7 @@ class TelewebionIE(InfoExtractor):
if not video_id.startswith('0x'):
video_id = hex(int(video_id))
- episode_data = self._call_graphql_api('getEpisodeDetail', video_id, dedent('''
+ episode_data = self._call_graphql_api('getEpisodeDetail', video_id, textwrap.dedent('''
queryEpisode(filter: {EpisodeID: $EpisodeId}, first: 1) {
title
program {
@@ -127,7 +127,7 @@ class TelewebionIE(InfoExtractor):
'formats': (
'channel', 'descriptor', {str},
{_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')},
- {partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
+ {functools.partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
}))
info_dict['id'] = video_id
return info_dict
diff --git a/yt_dlp/extractor/tempo.py b/yt_dlp/extractor/tempo.py
index 9318d6f..71e54eb 100644
--- a/yt_dlp/extractor/tempo.py
+++ b/yt_dlp/extractor/tempo.py
@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
traverse_obj,
- try_call
+ try_call,
)
diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py
index 6618ea4..ae2cb48 100644
--- a/yt_dlp/extractor/tencent.py
+++ b/yt_dlp/extractor/tencent.py
@@ -8,8 +8,8 @@ from .common import InfoExtractor
from ..aes import aes_cbc_encrypt_bytes
from ..utils import (
ExtractorError,
- float_or_none,
determine_ext,
+ float_or_none,
int_or_none,
js_to_json,
traverse_obj,
diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py
index a98275d..11cc570 100644
--- a/yt_dlp/extractor/tenplay.py
+++ b/yt_dlp/extractor/tenplay.py
@@ -1,7 +1,7 @@
import base64
+import datetime as dt
import functools
import itertools
-from datetime import datetime
from .common import InfoExtractor
from ..networking import HEADRequest
@@ -70,7 +70,7 @@ class TenPlayIE(InfoExtractor):
username, password = self._get_login_info()
if username is None or password is None:
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
- _timestamp = datetime.now().strftime('%Y%m%d000000')
+ _timestamp = dt.datetime.now().strftime('%Y%m%d000000')
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
'X-Network-Ten-Auth': _auth_header,
diff --git a/yt_dlp/extractor/theguardian.py b/yt_dlp/extractor/theguardian.py
index a231ecc..fb64077 100644
--- a/yt_dlp/extractor/theguardian.py
+++ b/yt_dlp/extractor/theguardian.py
@@ -10,7 +10,7 @@ from ..utils import (
parse_qs,
traverse_obj,
unified_strdate,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py
index a991a4d..99f0d42 100644
--- a/yt_dlp/extractor/theintercept.py
+++ b/yt_dlp/extractor/theintercept.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- parse_iso8601,
- int_or_none,
ExtractorError,
+ int_or_none,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py
index 9160f5e..eeb33a6 100644
--- a/yt_dlp/extractor/theplatform.py
+++ b/yt_dlp/extractor/theplatform.py
@@ -1,29 +1,27 @@
-import re
-import time
-import hmac
import binascii
import hashlib
+import hmac
+import re
+import time
-
-from .once import OnceIE
from .adobepass import AdobePassIE
-from ..networking import Request
+from .once import OnceIE
+from ..networking import HEADRequest, Request
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
+ find_xpath_attr,
float_or_none,
int_or_none,
- parse_qs,
- unsmuggle_url,
- update_url_query,
- xpath_with_ns,
mimetype2ext,
- find_xpath_attr,
+ parse_qs,
traverse_obj,
+ unsmuggle_url,
update_url,
+ update_url_query,
urlhandle_detect_ext,
+ xpath_with_ns,
)
-from ..networking import HEADRequest
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
diff --git a/yt_dlp/extractor/thisoldhouse.py b/yt_dlp/extractor/thisoldhouse.py
index 15f8380..fbc12d5 100644
--- a/yt_dlp/extractor/thisoldhouse.py
+++ b/yt_dlp/extractor/thisoldhouse.py
@@ -1,5 +1,6 @@
import json
+from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from .zype import ZypeIE
from ..networking import HEADRequest
@@ -8,6 +9,7 @@ from ..utils import (
ExtractorError,
filter_dict,
parse_qs,
+ smuggle_url,
try_call,
urlencode_postdata,
)
@@ -17,24 +19,44 @@ class ThisOldHouseIE(InfoExtractor):
_NETRC_MACHINE = 'thisoldhouse'
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
_TESTS = [{
+ # Unresolved Brightcove URL embed (formerly Zype), free
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
'info_dict': {
- 'id': '5dcdddf673c3f956ef5db202',
+ 'id': '6325298523112',
'ext': 'mp4',
'title': 'How to Build a Storage Bench',
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
- 'timestamp': 1442548800,
- 'upload_date': '20150918',
- 'duration': 674,
- 'view_count': int,
- 'average_rating': 0,
- 'thumbnail': r're:^https?://.*\.jpg\?\d+$',
- 'display_id': 'how-to-build-a-storage-bench',
+ 'timestamp': 1681793639,
+ 'upload_date': '20230418',
+ 'duration': 674.54,
+ 'tags': 'count:11',
+ 'uploader_id': '6314471934001',
+ 'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': True,
},
}, {
+ # Brightcove embed, authwalled
+ 'url': 'https://www.thisoldhouse.com/glen-ridge-generational/99537/s45-e17-multi-generational',
+ 'info_dict': {
+ 'id': '6349675446112',
+ 'ext': 'mp4',
+ 'title': 'E17 | Glen Ridge Generational | Multi-Generational',
+ 'description': 'md5:53c6bc2e8031f3033d693d9a3563222c',
+ 'timestamp': 1711382202,
+ 'upload_date': '20240325',
+ 'duration': 1422.229,
+ 'tags': 'count:13',
+ 'uploader_id': '6314471934001',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ 'expected_warnings': ['Login with password is not supported for this website'],
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires subscription',
+ }, {
# Page no longer has video
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True,
@@ -98,7 +120,15 @@ class ThisOldHouseIE(InfoExtractor):
video_url, video_id = self._search_regex(
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
- webpage, 'video url', group=(1, 2))
- video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
+ webpage, 'zype url', group=(1, 2), default=(None, None))
+ if video_url:
+ video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
+ return self.url_result(video_url, ZypeIE, video_id)
- return self.url_result(video_url, ZypeIE, video_id)
+ video_url, video_id = self._search_regex([
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//players\.brightcove\.net/\d+/\w+/index\.html\?videoId=(\d+))',
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)thisoldhouse\.com/videos/brightcove/(\d+))'],
+ webpage, 'iframe url', group=(1, 2))
+ if not parse_qs(video_url).get('videoId'):
+ video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Brightcove URL').url
+ return self.url_result(smuggle_url(video_url, {'referrer': url}), BrightcoveNewIE, video_id)
diff --git a/yt_dlp/extractor/thisvid.py b/yt_dlp/extractor/thisvid.py
index 9d3368e..04b0838 100644
--- a/yt_dlp/extractor/thisvid.py
+++ b/yt_dlp/extractor/thisvid.py
@@ -134,7 +134,7 @@ class ThisVidPlaylistBaseIE(InfoExtractor):
title = re.split(
r'(?i)\s*\|\s*ThisVid\.com\s*$',
self._og_search_title(webpage, default=None)
- or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
+ or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', maxsplit=1)[0] or None
return self.playlist_from_matches(
self._generate_playlist_entries(url, playlist_id, webpage),
diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py
index 7841f8d..f7a13d2 100644
--- a/yt_dlp/extractor/threeqsdn.py
+++ b/yt_dlp/extractor/threeqsdn.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
float_or_none,
int_or_none,
join_nonempty,
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index aa83567..7bcfded 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -1,23 +1,27 @@
+import functools
import itertools
import json
import random
import re
import string
import time
+import uuid
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
- LazyList,
UnsupportedError,
UserNotLive,
determine_ext,
+ filter_dict,
format_field,
int_or_none,
join_nonempty,
merge_dicts,
+ mimetype2ext,
+ parse_qs,
qualities,
remove_start,
srt_subtitles_timecode,
@@ -30,19 +34,71 @@ from ..utils import (
class TikTokBaseIE(InfoExtractor):
- _APP_VERSIONS = [('26.1.3', '260103'), ('26.1.2', '260102'), ('26.1.1', '260101'), ('25.6.2', '250602')]
- _WORKING_APP_VERSION = None
- _APP_NAME = 'trill'
- _AID = 1180
_UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
_WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p')
- @property
+ _APP_INFO_DEFAULTS = {
+ # unique "install id"
+ 'iid': None,
+ # TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
+ 'app_name': 'musical_ly',
+ 'app_version': '34.1.2',
+ 'manifest_app_version': '2023401020',
+ # "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
+ 'aid': '0',
+ }
+ _APP_INFO_POOL = None
+ _APP_INFO = None
+ _APP_USER_AGENT = None
+
+ @functools.cached_property
+ def _KNOWN_APP_INFO(self):
+ # If we have a genuine device ID, we may not need any IID
+ default = [''] if self._KNOWN_DEVICE_ID else []
+ return self._configuration_arg('app_info', default, ie_key=TikTokIE)
+
+ @functools.cached_property
+ def _KNOWN_DEVICE_ID(self):
+ return self._configuration_arg('device_id', [None], ie_key=TikTokIE)[0]
+
+ @functools.cached_property
+ def _DEVICE_ID(self):
+ return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000))
+
+ @functools.cached_property
def _API_HOSTNAME(self):
return self._configuration_arg(
'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0]
+ def _get_next_app_info(self):
+ if self._APP_INFO_POOL is None:
+ defaults = {
+ key: self._configuration_arg(key, [default], ie_key=TikTokIE)[0]
+ for key, default in self._APP_INFO_DEFAULTS.items()
+ if key != 'iid'
+ }
+ self._APP_INFO_POOL = [
+ {**defaults, **dict(
+ (k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
+ )} for app_info in self._KNOWN_APP_INFO
+ ]
+
+ if not self._APP_INFO_POOL:
+ return False
+
+ self._APP_INFO = self._APP_INFO_POOL.pop(0)
+
+ app_name = self._APP_INFO['app_name']
+ version = self._APP_INFO['manifest_app_version']
+ if app_name == 'musical_ly':
+ package = f'com.zhiliaoapp.musically/{version}'
+ else: # trill, aweme
+ package = f'com.ss.android.ugc.{app_name}/{version}'
+ self._APP_USER_AGENT = f'{package} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)'
+
+ return True
+
@staticmethod
def _create_url(user_id, video_id):
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
@@ -58,7 +114,7 @@ class TikTokBaseIE(InfoExtractor):
'universal data', display_id, end_pattern=r'</script>', default={}),
('__DEFAULT_SCOPE__', {dict})) or {}
- def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
+ def _call_api_impl(self, ep, query, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
@@ -67,80 +123,85 @@ class TikTokBaseIE(InfoExtractor):
return self._download_json(
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
fatal=fatal, note=note, errnote=errnote, headers={
- 'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)',
+ 'User-Agent': self._APP_USER_AGENT,
'Accept': 'application/json',
}, query=query)
- def _build_api_query(self, query, app_version, manifest_app_version):
- return {
+ def _build_api_query(self, query):
+ return filter_dict({
**query,
- 'version_name': app_version,
- 'version_code': manifest_app_version,
- 'build_number': app_version,
- 'manifest_version_code': manifest_app_version,
- 'update_version_code': manifest_app_version,
- 'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
- 'uuid': ''.join(random.choices(string.digits, k=16)),
- '_rticket': int(time.time() * 1000),
- 'ts': int(time.time()),
- 'device_brand': 'Google',
- 'device_type': 'Pixel 7',
'device_platform': 'android',
+ 'os': 'android',
+ 'ssmix': 'a',
+ '_rticket': int(time.time() * 1000),
+ 'cdid': str(uuid.uuid4()),
+ 'channel': 'googleplay',
+ 'aid': self._APP_INFO['aid'],
+ 'app_name': self._APP_INFO['app_name'],
+ 'version_code': ''.join((f'{int(v):02d}' for v in self._APP_INFO['app_version'].split('.'))),
+ 'version_name': self._APP_INFO['app_version'],
+ 'manifest_version_code': self._APP_INFO['manifest_app_version'],
+ 'update_version_code': self._APP_INFO['manifest_app_version'],
+ 'ab_version': self._APP_INFO['app_version'],
'resolution': '1080*2400',
'dpi': 420,
- 'os_version': '13',
+ 'device_type': 'Pixel 7',
+ 'device_brand': 'Google',
+ 'language': 'en',
'os_api': '29',
- 'carrier_region': 'US',
+ 'os_version': '13',
+ 'ac': 'wifi',
+ 'is_pad': '0',
+ 'current_region': 'US',
+ 'app_type': 'normal',
'sys_region': 'US',
- 'region': 'US',
- 'app_name': self._APP_NAME,
- 'app_language': 'en',
- 'language': 'en',
+ 'last_install_time': int(time.time()) - random.randint(86400, 1123200),
'timezone_name': 'America/New_York',
+ 'residence': 'US',
+ 'app_language': 'en',
'timezone_offset': '-14400',
- 'channel': 'googleplay',
- 'ac': 'wifi',
- 'mcc_mnc': '310260',
- 'is_my_cn': 0,
- 'aid': self._AID,
- 'ssmix': 'a',
- 'as': 'a1qwert123',
- 'cp': 'cbfhckdckkde1',
- }
+ 'host_abi': 'armeabi-v7a',
+ 'locale': 'en',
+ 'ac2': 'wifi5g',
+ 'uoo': '1',
+ 'carrier_region': 'US',
+ 'op_region': 'US',
+ 'build_number': self._APP_INFO['app_version'],
+ 'region': 'US',
+ 'ts': int(time.time()),
+ 'iid': self._APP_INFO.get('iid'),
+ 'device_id': self._DEVICE_ID,
+ 'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
+ })
def _call_api(self, ep, query, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
- if not self._WORKING_APP_VERSION:
- app_version = self._configuration_arg('app_version', [''], ie_key=TikTokIE.ie_key())[0]
- manifest_app_version = self._configuration_arg('manifest_app_version', [''], ie_key=TikTokIE.ie_key())[0]
- if app_version and manifest_app_version:
- self._WORKING_APP_VERSION = (app_version, manifest_app_version)
- self.write_debug('Imported app version combo from extractor arguments')
- elif app_version or manifest_app_version:
- self.report_warning('Only one of the two required version params are passed as extractor arguments', only_once=True)
-
- if self._WORKING_APP_VERSION:
- app_version, manifest_app_version = self._WORKING_APP_VERSION
- real_query = self._build_api_query(query, app_version, manifest_app_version)
- return self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
-
- for count, (app_version, manifest_app_version) in enumerate(self._APP_VERSIONS, start=1):
- real_query = self._build_api_query(query, app_version, manifest_app_version)
+ if not self._APP_INFO and not self._get_next_app_info():
+ message = 'No working app info is available'
+ if fatal:
+ raise ExtractorError(message, expected=True)
+ else:
+ self.report_warning(message)
+ return
+
+ max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
+ for count in itertools.count(1):
+ self.write_debug(str(self._APP_INFO))
+ real_query = self._build_api_query(query)
try:
- res = self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
- self._WORKING_APP_VERSION = (app_version, manifest_app_version)
- return res
+ return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
- if count == len(self._APP_VERSIONS):
+ message = str(e.cause or e.msg)
+ if not self._get_next_app_info():
if fatal:
- raise e
+ raise
else:
- self.report_warning(str(e.cause or e.msg))
+ self.report_warning(message)
return
- self.report_warning('%s. Retrying... (attempt %s of %s)' % (str(e.cause or e.msg), count, len(self._APP_VERSIONS)))
+ self.report_warning(f'{message}. Retrying... (attempt {count} of {max_tries})')
continue
- raise e
+ raise
def _extract_aweme_app(self, aweme_id):
feed_list = self._call_api(
@@ -151,7 +212,31 @@ class TikTokBaseIE(InfoExtractor):
raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
return self._parse_aweme_video_app(aweme_detail)
- def _get_subtitles(self, aweme_detail, aweme_id):
+ def _extract_web_data_and_status(self, url, video_id, fatal=True):
+ webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or ''
+ video_data, status = {}, None
+
+ if universal_data := self._get_universal_data(webpage, video_id):
+ self.write_debug('Found universal data for rehydration')
+ status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
+ video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
+
+ elif sigi_data := self._get_sigi_state(webpage, video_id):
+ self.write_debug('Found sigi state data')
+ status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
+ video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
+
+ elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
+ self.write_debug('Found next.js data')
+ status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
+ video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
+
+ elif fatal:
+ raise ExtractorError('Unable to extract webpage video data')
+
+ return video_data, status
+
+ def _get_subtitles(self, aweme_detail, aweme_id, user_name):
# TODO: Extract text positioning info
subtitles = {}
# aweme/detail endpoint subs
@@ -182,32 +267,32 @@ class TikTokBaseIE(InfoExtractor):
})
# webpage subs
if not subtitles:
- for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', ...), expected_type=dict):
- if not caption.get('Url'):
- continue
+ if user_name: # only _parse_aweme_video_app needs to extract the webpage here
+ aweme_detail, _ = self._extract_web_data_and_status(
+ self._create_url(user_name, aweme_id), aweme_id, fatal=False)
+ for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
'ext': remove_start(caption.get('Format'), 'web'),
'url': caption['Url'],
})
return subtitles
+ def _parse_url_key(self, url_key):
+ format_id, codec, res, bitrate = self._search_regex(
+ r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
+ 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
+ if not format_id:
+ return {}, None
+ return {
+ 'format_id': format_id,
+ 'vcodec': 'h265' if codec == 'bytevc1' else codec,
+ 'tbr': int_or_none(bitrate, scale=1000) or None,
+ 'quality': qualities(self.QUALITIES)(res),
+ }, res
+
def _parse_aweme_video_app(self, aweme_detail):
aweme_id = aweme_detail['aweme_id']
video_info = aweme_detail['video']
-
- def parse_url_key(url_key):
- format_id, codec, res, bitrate = self._search_regex(
- r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
- 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
- if not format_id:
- return {}, None
- return {
- 'format_id': format_id,
- 'vcodec': 'h265' if codec == 'bytevc1' else codec,
- 'tbr': int_or_none(bitrate, scale=1000) or None,
- 'quality': qualities(self.QUALITIES)(res),
- }, res
-
known_resolutions = {}
def audio_meta(url):
@@ -222,7 +307,8 @@ class TikTokBaseIE(InfoExtractor):
} if ext == 'mp3' or '-music-' in url else {}
def extract_addr(addr, add_meta={}):
- parsed_meta, res = parse_url_key(addr.get('url_key', ''))
+ parsed_meta, res = self._parse_url_key(addr.get('url_key', ''))
+ is_bytevc2 = parsed_meta.get('vcodec') == 'bytevc2'
if res:
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
@@ -235,8 +321,11 @@ class TikTokBaseIE(InfoExtractor):
'acodec': 'aac',
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
**add_meta, **parsed_meta,
+ # bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable
+ 'preference': -100 if is_bytevc2 else -1,
'format_note': join_nonempty(
- add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' '),
+ add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None,
+ '(UNPLAYABLE)' if is_bytevc2 else None, delim=' '),
**audio_meta(url),
} for url in addr.get('url_list') or []]
@@ -244,6 +333,7 @@ class TikTokBaseIE(InfoExtractor):
formats = []
width = int_or_none(video_info.get('width'))
height = int_or_none(video_info.get('height'))
+ ratio = try_call(lambda: width / height) or 0.5625
if video_info.get('play_addr'):
formats.extend(extract_addr(video_info['play_addr'], {
'format_id': 'play_addr',
@@ -260,8 +350,8 @@ class TikTokBaseIE(InfoExtractor):
'format_id': 'download_addr',
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
'vcodec': 'h264',
- 'width': dl_width or width,
- 'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
+ 'width': dl_width,
+ 'height': try_call(lambda: int(dl_width / ratio)), # download_addr['height'] is wrong
'preference': -2 if video_info.get('has_watermark') else -1,
}))
if video_info.get('play_addr_h264'):
@@ -304,11 +394,7 @@ class TikTokBaseIE(InfoExtractor):
})
stats_info = aweme_detail.get('statistics') or {}
- author_info = aweme_detail.get('author') or {}
music_info = aweme_detail.get('music') or {}
- user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
- 'sec_uid', 'id', 'uid', 'unique_id',
- expected_type=str_or_none, get_all=False))
labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str)
contained_music_track = traverse_obj(
@@ -322,6 +408,13 @@ class TikTokBaseIE(InfoExtractor):
else:
music_track, music_author = music_info.get('title'), traverse_obj(music_info, ('author', {str}))
+ author_info = traverse_obj(aweme_detail, ('author', {
+ 'uploader': ('unique_id', {str}),
+ 'uploader_id': ('uid', {str_or_none}),
+ 'channel': ('nickname', {str}),
+ 'channel_id': ('sec_uid', {str}),
+ }))
+
return {
'id': aweme_id,
**traverse_obj(aweme_detail, {
@@ -335,21 +428,20 @@ class TikTokBaseIE(InfoExtractor):
'repost_count': 'share_count',
'comment_count': 'comment_count',
}, expected_type=int_or_none),
- **traverse_obj(author_info, {
- 'uploader': ('unique_id', {str}),
- 'uploader_id': ('uid', {str_or_none}),
- 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
- 'channel': ('nickname', {str}),
- 'channel_id': ('sec_uid', {str}),
- }),
- 'uploader_url': user_url,
+ **author_info,
+ 'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None),
+ 'uploader_url': format_field(
+ author_info, ['uploader', 'uploader_id'], self._UPLOADER_URL_FORMAT, default=None),
'track': music_track,
'album': str_or_none(music_info.get('album')) or None,
'artists': re.split(r'(?:, | & )', music_author) if music_author else None,
'formats': formats,
- 'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
+ 'subtitles': self.extract_subtitles(
+ aweme_detail, aweme_id, traverse_obj(author_info, 'uploader', 'uploader_id', 'channel_id')),
'thumbnails': thumbnails,
- 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000),
+ 'duration': (traverse_obj(video_info, (
+ (None, 'download_addr'), 'duration', {functools.partial(int_or_none, scale=1000)}, any))
+ or traverse_obj(music_info, ('duration', {int_or_none}))),
'availability': self._availability(
is_private='Private' in labels,
needs_subscription='Friends only' in labels,
@@ -357,78 +449,136 @@ class TikTokBaseIE(InfoExtractor):
'_format_sort_fields': ('quality', 'codec', 'size', 'br'),
}
- def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
- video_info = aweme_detail['video']
- author_info = traverse_obj(aweme_detail, 'authorInfo', 'author', expected_type=dict, default={})
- music_info = aweme_detail.get('music') or {}
- stats_info = aweme_detail.get('stats') or {}
- channel_id = traverse_obj(author_info or aweme_detail, (('authorSecId', 'secUid'), {str}), get_all=False)
- user_url = self._UPLOADER_URL_FORMAT % channel_id if channel_id else None
-
+ def _extract_web_formats(self, aweme_detail):
+ COMMON_FORMAT_INFO = {
+ 'ext': 'mp4',
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ }
+ video_info = traverse_obj(aweme_detail, ('video', {dict})) or {}
+ play_width = int_or_none(video_info.get('width'))
+ play_height = int_or_none(video_info.get('height'))
+ ratio = try_call(lambda: play_width / play_height) or 0.5625
formats = []
- width = int_or_none(video_info.get('width'))
- height = int_or_none(video_info.get('height'))
+
+ for bitrate_info in traverse_obj(video_info, ('bitrateInfo', lambda _, v: v['PlayAddr']['UrlList'])):
+ format_info, res = self._parse_url_key(
+ traverse_obj(bitrate_info, ('PlayAddr', 'UrlKey', {str})) or '')
+ # bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable
+ is_bytevc2 = format_info.get('vcodec') == 'bytevc2'
+ format_info.update({
+ 'format_note': 'UNPLAYABLE' if is_bytevc2 else None,
+ 'preference': -100 if is_bytevc2 else -1,
+ 'filesize': traverse_obj(bitrate_info, ('PlayAddr', 'DataSize', {int_or_none})),
+ })
+
+ if dimension := (res and int(res[:-1])):
+ if dimension == 540: # '540p' is actually 576p
+ dimension = 576
+ if ratio < 1: # portrait: res/dimension is width
+ y = int(dimension / ratio)
+ format_info.update({
+ 'width': dimension,
+ 'height': y - (y % 2),
+ })
+ else: # landscape: res/dimension is height
+ x = int(dimension * ratio)
+ format_info.update({
+ 'width': x + (x % 2),
+ 'height': dimension,
+ })
+
+ for video_url in traverse_obj(bitrate_info, ('PlayAddr', 'UrlList', ..., {url_or_none})):
+ formats.append({
+ **COMMON_FORMAT_INFO,
+ **format_info,
+ 'url': self._proto_relative_url(video_url),
+ })
+
+ # We don't have res string for play formats, but need quality for sorting & de-duplication
+ play_quality = traverse_obj(formats, (lambda _, v: v['width'] == play_width, 'quality', any))
for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})):
formats.append({
+ **COMMON_FORMAT_INFO,
+ 'format_id': 'play',
'url': self._proto_relative_url(play_url),
- 'ext': 'mp4',
- 'width': width,
- 'height': height,
+ 'width': play_width,
+ 'height': play_height,
+ 'quality': play_quality,
})
for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})):
formats.append({
+ **COMMON_FORMAT_INFO,
'format_id': 'download',
'url': self._proto_relative_url(download_url),
- 'ext': 'mp4',
- 'width': width,
- 'height': height,
})
self._remove_duplicate_formats(formats)
- thumbnails = []
- for thumb_url in traverse_obj(aweme_detail, (
- (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})):
- thumbnails.append({
- 'url': self._proto_relative_url(thumb_url),
- 'width': width,
- 'height': height,
+ for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']):
+ f.update({
+ 'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '),
+ 'preference': f.get('preference') or -2,
})
+ # Is it a slideshow with only audio for download?
+ if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})):
+ audio_url = aweme_detail['music']['playUrl']
+ ext = traverse_obj(parse_qs(audio_url), (
+ 'mime_type', -1, {lambda x: x.replace('_', '/')}, {mimetype2ext})) or 'm4a'
+ formats.append({
+ 'format_id': 'audio',
+ 'url': self._proto_relative_url(audio_url),
+ 'ext': ext,
+ 'acodec': 'aac' if ext == 'm4a' else ext,
+ 'vcodec': 'none',
+ })
+
+ return formats
+
+ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False):
+ author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), {
+ 'channel': ('nickname', {str}),
+ 'channel_id': (('authorSecId', 'secUid'), {str}),
+ 'uploader': (('uniqueId', 'author'), {str}),
+ 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
+ }), get_all=False)
+
return {
'id': video_id,
+ 'formats': None if extract_flat else self._extract_web_formats(aweme_detail),
+ 'subtitles': None if extract_flat else self.extract_subtitles(aweme_detail, video_id, None),
+ 'http_headers': {'Referer': webpage_url},
+ **author_info,
+ 'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None),
+ 'uploader_url': format_field(
+ author_info, ['uploader', 'uploader_id'], self._UPLOADER_URL_FORMAT, default=None),
+ **traverse_obj(aweme_detail, ('music', {
+ 'track': ('title', {str}),
+ 'album': ('album', {str}, {lambda x: x or None}),
+ 'artists': ('authorName', {str}, {lambda x: re.split(r'(?:, | & )', x) if x else None}),
+ 'duration': ('duration', {int_or_none}),
+ })),
**traverse_obj(aweme_detail, {
'title': ('desc', {str}),
'description': ('desc', {str}),
- 'duration': ('video', 'duration', {int_or_none}),
+ # audio-only slideshows have a video duration of 0 and an actual audio duration
+ 'duration': ('video', 'duration', {int_or_none}, {lambda x: x or None}),
'timestamp': ('createTime', {int_or_none}),
}),
- **traverse_obj(author_info or aweme_detail, {
- 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
- 'channel': ('nickname', {str}),
- 'uploader': (('uniqueId', 'author'), {str}),
- 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
- }, get_all=False),
- **traverse_obj(stats_info, {
+ **traverse_obj(aweme_detail, ('stats', {
'view_count': 'playCount',
'like_count': 'diggCount',
'repost_count': 'shareCount',
'comment_count': 'commentCount',
- }, expected_type=int_or_none),
- **traverse_obj(music_info, {
- 'track': ('title', {str}),
- 'album': ('album', {str}, {lambda x: x or None}),
- 'artists': ('authorName', {str}, {lambda x: [x] if x else None}),
- }),
- 'channel_id': channel_id,
- 'uploader_url': user_url,
- 'formats': formats,
- 'thumbnails': thumbnails,
- 'http_headers': {
- 'Referer': webpage_url,
- }
+ }), expected_type=int_or_none),
+ 'thumbnails': traverse_obj(aweme_detail, (
+ (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {
+ 'url': ({url_or_none}, {self._proto_relative_url}),
+ },
+ )),
}
@@ -465,21 +615,21 @@ class TikTokIE(TikTokBaseIE):
'skip': '404 Not Found',
}, {
'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
- 'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b',
+ 'md5': 'f21112672ee4ce05ca390fb6522e1b6f',
'info_dict': {
'id': '6742501081818877190',
'ext': 'mp4',
'title': 'md5:5e2a23877420bb85ce6521dbee39ba94',
'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
'duration': 27,
- 'height': 960,
- 'width': 540,
+ 'height': 1024,
+ 'width': 576,
'uploader': 'patrox',
'uploader_id': '18702747',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
+ 'uploader_url': 'https://www.tiktok.com/@patrox',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
'channel': 'patroX',
- 'creators': ['patroX'],
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
'upload_date': '20190930',
'timestamp': 1569860870,
@@ -491,7 +641,7 @@ class TikTokIE(TikTokBaseIE):
'track': 'Big Fun',
},
}, {
- # Banned audio, only available on the app
+ # Banned audio, was available on the app, now works with web too
'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402',
'info_dict': {
'id': '6984138651336838402',
@@ -500,9 +650,9 @@ class TikTokIE(TikTokBaseIE):
'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
'uploader': 'barudakhb_',
'channel': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
- 'creators': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
'uploader_id': '6974687867511718913',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
+ 'uploader_url': 'https://www.tiktok.com/@barudakhb_',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'track': 'Boka Dance',
'artists': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
@@ -525,7 +675,6 @@ class TikTokIE(TikTokBaseIE):
'description': 'Slap and Run!',
'uploader': 'user440922249',
'channel': 'Slap And Run',
- 'creators': ['Slap And Run'],
'uploader_id': '7036055384943690754',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
@@ -539,7 +688,7 @@ class TikTokIE(TikTokBaseIE):
'repost_count': int,
'comment_count': int,
},
- 'params': {'skip_download': True}, # XXX: unable to download video data: HTTP Error 403: Forbidden
+ 'skip': 'This video is unavailable',
}, {
# Video without title and description
'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
@@ -550,9 +699,9 @@ class TikTokIE(TikTokBaseIE):
'description': '',
'uploader': 'pokemonlife22',
'channel': 'Pokemon',
- 'creators': ['Pokemon'],
'uploader_id': '6820838815978423302',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
+ 'uploader_url': 'https://www.tiktok.com/@pokemonlife22',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'track': 'original sound',
'timestamp': 1643714123,
@@ -597,13 +746,14 @@ class TikTokIE(TikTokBaseIE):
'title': 'TikTok video #7139980461132074283',
'description': '',
'channel': 'Antaura',
- 'creators': ['Antaura'],
'uploader': '_le_cannibale_',
'uploader_id': '6604511138619654149',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
+ 'uploader_url': 'https://www.tiktok.com/@_le_cannibale_',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
'artists': ['nathan !'],
'track': 'grahamscott canon',
+ 'duration': 10,
'upload_date': '20220905',
'timestamp': 1662406249,
'view_count': int,
@@ -614,18 +764,18 @@ class TikTokIE(TikTokBaseIE):
},
}, {
# only available via web
- 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', # FIXME
- 'md5': '6aba7fad816e8709ff2c149679ace165',
+ 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662',
+ 'md5': '4cdefa501ac8ac20bf04986e10916fea',
'info_dict': {
'id': '7206382937372134662',
'ext': 'mp4',
'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
'channel': 'MoxyPatch',
- 'creators': ['MoxyPatch'],
'uploader': 'moxypatch',
'uploader_id': '7039142049363379205',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
+ 'uploader_url': 'https://www.tiktok.com/@moxypatch',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
'artists': ['your worst nightmare'],
'track': 'original sound',
@@ -654,7 +804,6 @@ class TikTokIE(TikTokBaseIE):
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'channel': 'tate mcrae',
- 'creators': ['tate mcrae'],
'artists': ['tate mcrae'],
'track': 'original sound',
'upload_date': '20220609',
@@ -666,7 +815,7 @@ class TikTokIE(TikTokBaseIE):
'comment_count': int,
'thumbnail': r're:^https://.+\.webp',
},
- 'skip': 'Unavailable via feed API, no formats available via web',
+ 'skip': 'Unavailable via feed API, only audio available via web',
}, {
# Slideshow, audio-only m4a format
'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594',
@@ -678,13 +827,14 @@ class TikTokIE(TikTokBaseIE):
'description': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ',
'uploader': 'hara_yoimiya',
'uploader_id': '6582536342634676230',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
+ 'uploader_url': 'https://www.tiktok.com/@hara_yoimiya',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
- 'channel': 'лампочка',
- 'creators': ['лампочка'],
+ 'channel': 'лампочка(!)',
'artists': ['Øneheart'],
'album': 'watching the stars',
'track': 'watching the stars',
+ 'duration': 60,
'upload_date': '20230708',
'timestamp': 1688816612,
'view_count': int,
@@ -701,32 +851,16 @@ class TikTokIE(TikTokBaseIE):
def _real_extract(self, url):
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
- try:
- return self._extract_aweme_app(video_id)
- except ExtractorError as e:
- e.expected = True
- self.report_warning(f'{e}; trying with webpage')
-
- url = self._create_url(user_id, video_id)
- webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
-
- if universal_data := self._get_universal_data(webpage, video_id):
- self.write_debug('Found universal data for rehydration')
- status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
- video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
-
- elif sigi_data := self._get_sigi_state(webpage, video_id):
- self.write_debug('Found sigi state data')
- status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
- video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
- elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
- self.write_debug('Found next.js data')
- status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
- video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
+ if self._KNOWN_APP_INFO:
+ try:
+ return self._extract_aweme_app(video_id)
+ except ExtractorError as e:
+ e.expected = True
+ self.report_warning(f'{e}; trying with webpage')
- else:
- raise ExtractorError('Unable to extract webpage video data')
+ url = self._create_url(user_id, video_id)
+ video_data, status = self._extract_web_data_and_status(url, video_id)
if video_data and status == 0:
return self._parse_aweme_video_web(video_data, url, video_id)
@@ -737,102 +871,145 @@ class TikTokIE(TikTokBaseIE):
class TikTokUserIE(TikTokBaseIE):
IE_NAME = 'tiktok:user'
- _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])'
- _WORKING = False
+ _VALID_URL = r'(?:tiktokuser:|https?://(?:www\.)?tiktok\.com/@)(?P<id>[\w.-]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://tiktok.com/@corgibobaa?lang=en',
'playlist_mincount': 45,
'info_dict': {
- 'id': '6935371178089399301',
+ 'id': 'MS4wLjABAAAAepiJKgwWhulvCpSuUVsp7sgVVsFJbbNaLeQ6OQ0oAJERGDUIXhb2yxxHZedsItgT',
'title': 'corgibobaa',
- 'thumbnail': r're:https://.+_1080x1080\.webp'
},
- 'expected_warnings': ['Retrying']
}, {
'url': 'https://www.tiktok.com/@6820838815978423302',
'playlist_mincount': 5,
'info_dict': {
- 'id': '6820838815978423302',
+ 'id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'title': '6820838815978423302',
- 'thumbnail': r're:https://.+_1080x1080\.webp'
},
- 'expected_warnings': ['Retrying']
}, {
'url': 'https://www.tiktok.com/@meme',
'playlist_mincount': 593,
'info_dict': {
- 'id': '79005827461758976',
+ 'id': 'MS4wLjABAAAAiKfaDWeCsT3IHwY77zqWGtVRIy9v4ws1HbVi7auP1Vx7dJysU_hc5yRiGywojRD6',
'title': 'meme',
- 'thumbnail': r're:https://.+_1080x1080\.webp'
},
- 'expected_warnings': ['Retrying']
+ }, {
+ 'url': 'tiktokuser:MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ',
+ 'playlist_mincount': 31,
+ 'info_dict': {
+ 'id': 'MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ',
+ },
}]
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0'
+ _API_BASE_URL = 'https://www.tiktok.com/api/creator/item_list/'
- r''' # TODO: Fix by adding _signature to api_url
- def _entries(self, webpage, user_id, username):
- secuid = self._search_regex(r'\"secUid\":\"(?P<secUid>[^\"]+)', webpage, username)
- verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id')
- if not verifyfp_cookie:
- raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True)
- api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor='
- cursor = '0'
- for page in itertools.count():
- data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page)
- for video in data_json.get('itemList', []):
- video_id = video['id']
- video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}'
- yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc')))
- if not data_json.get('hasMore'):
- break
- cursor = data_json['cursor']
- '''
-
- def _video_entries_api(self, webpage, user_id, username):
- query = {
- 'user_id': user_id,
- 'count': 21,
- 'max_cursor': 0,
- 'min_cursor': 0,
- 'retry_type': 'no_retry',
- 'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
+ def _build_web_query(self, sec_uid, cursor):
+ return {
+ 'aid': '1988',
+ 'app_language': 'en',
+ 'app_name': 'tiktok_web',
+ 'browser_language': 'en-US',
+ 'browser_name': 'Mozilla',
+ 'browser_online': 'true',
+ 'browser_platform': 'Win32',
+ 'browser_version': '5.0 (Windows)',
+ 'channel': 'tiktok_web',
+ 'cookie_enabled': 'true',
+ 'count': '15',
+ 'cursor': cursor,
+ 'device_id': self._DEVICE_ID,
+ 'device_platform': 'web_pc',
+ 'focus_state': 'true',
+ 'from_page': 'user',
+ 'history_len': '2',
+ 'is_fullscreen': 'false',
+ 'is_page_visible': 'true',
+ 'language': 'en',
+ 'os': 'windows',
+ 'priority_region': '',
+ 'referer': '',
+ 'region': 'US',
+ 'screen_height': '1080',
+ 'screen_width': '1920',
+ 'secUid': sec_uid,
+ 'type': '1', # pagination type: 0 == oldest-to-newest, 1 == newest-to-oldest
+ 'tz_name': 'UTC',
+ 'verifyFp': f'verify_{"".join(random.choices(string.hexdigits, k=7))}',
+ 'webcast_language': 'en',
}
+ def _entries(self, sec_uid, user_name):
+ display_id = user_name or sec_uid
+ seen_ids = set()
+
+ cursor = int(time.time() * 1E3)
for page in itertools.count(1):
- for retry in self.RetryManager():
- try:
- post_list = self._call_api(
- 'aweme/post', query, username, note=f'Downloading user video list page {page}',
- errnote='Unable to download user video list')
- except ExtractorError as e:
- if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
- retry.error = e
- continue
- raise
- yield from post_list.get('aweme_list', [])
- if not post_list.get('has_more'):
+ response = self._download_json(
+ self._API_BASE_URL, display_id, f'Downloading page {page}',
+ query=self._build_web_query(sec_uid, cursor), headers={'User-Agent': self._USER_AGENT})
+
+ for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])):
+ video_id = video['id']
+ if video_id in seen_ids:
+ continue
+ seen_ids.add(video_id)
+ webpage_url = self._create_url(display_id, video_id)
+ yield self.url_result(
+ webpage_url, TikTokIE,
+ **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True))
+
+ old_cursor = cursor
+ cursor = traverse_obj(
+ response, ('itemList', -1, 'createTime', {lambda x: int(x * 1E3)}))
+ if not cursor or old_cursor == cursor:
+ # User may not have posted within this ~1 week lookback, so manually adjust cursor
+ cursor = old_cursor - 7 * 86_400_000
+ # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed
+ if cursor < 1472706000000 or not traverse_obj(response, 'hasMorePrevious'):
break
- query['max_cursor'] = post_list['max_cursor']
-
- def _entries_api(self, user_id, videos):
- for video in videos:
- yield {
- **self._parse_aweme_video_app(video),
- 'extractor_key': TikTokIE.ie_key(),
- 'extractor': 'TikTok',
- 'webpage_url': f'https://tiktok.com/@{user_id}/video/{video["aweme_id"]}',
- }
- def _real_extract(self, url):
- user_name = self._match_id(url)
- webpage = self._download_webpage(url, user_name, headers={
- 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
- })
- user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID', default=None) or user_name
+ def _get_sec_uid(self, user_url, user_name, msg):
+ webpage = self._download_webpage(
+ user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'},
+ note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage') or ''
+ return (traverse_obj(self._get_universal_data(webpage, user_name),
+ ('webapp.user-detail', 'userInfo', 'user', 'secUid', {str}))
+ or traverse_obj(self._get_sigi_state(webpage, user_name),
+ ('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}),
+ ('UserModule', 'users', ..., 'secUid', {str}, any)))
- videos = LazyList(self._video_entries_api(webpage, user_id, user_name))
- thumbnail = traverse_obj(videos, (0, 'author', 'avatar_larger', 'url_list', 0))
+ def _real_extract(self, url):
+ user_name, sec_uid = self._match_id(url), None
+ if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name):
+ user_name, sec_uid = None, mobj.group(0)
+ else:
+ sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user')
+ or self._get_sec_uid(self._UPLOADER_URL_FORMAT % f'{user_name}/live', user_name, 'live'))
+
+ if not sec_uid:
+ webpage = self._download_webpage(
+ f'https://www.tiktok.com/embed/@{user_name}', user_name,
+ note='Downloading user embed page', fatal=False) or ''
+ data = traverse_obj(self._search_json(
+ r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>',
+ webpage, 'data', user_name, default={}),
+ ('source', 'data', f'/embed/@{user_name}', {dict}))
+
+ for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})):
+ webpage_url = self._create_url(user_name, aweme_id)
+ video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False)
+ sec_uid = self._parse_aweme_video_web(
+ video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id')
+ if sec_uid:
+ break
+
+ if not sec_uid:
+ raise ExtractorError(
+ 'Unable to extract secondary user ID. If you are able to get the channel_id '
+ 'from a video posted by this user, try using "tiktokuser:channel_id" as the '
+ 'input URL (replacing `channel_id` with its actual value)', expected=True)
- return self.playlist_result(self._entries_api(user_id, videos), user_id, user_name, thumbnail=thumbnail)
+ return self.playlist_result(self._entries(sec_uid, user_name), sec_uid, user_name)
class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
@@ -842,7 +1019,7 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul
'cursor': 0,
'count': 20,
'type': 5,
- 'device_id': ''.join(random.choices(string.digits, k=19))
+ 'device_id': self._DEVICE_ID,
}
for page in itertools.count(1):
@@ -944,6 +1121,64 @@ class TikTokTagIE(TikTokBaseListIE):
return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id)
+class TikTokCollectionIE(TikTokBaseIE):
+ IE_NAME = 'tiktok:collection'
+ _VALID_URL = r'https?://www\.tiktok\.com/@(?P<user_id>[\w.-]+)/collection/(?P<title>[^/?#]+)-(?P<id>\d+)/?(?:[?#]|$)'
+ _TESTS = [{
+ # playlist should have exactly 9 videos
+ 'url': 'https://www.tiktok.com/@imanoreotwe/collection/count-test-7371330159376370462',
+ 'info_dict': {
+ 'id': '7371330159376370462',
+ 'title': 'imanoreotwe-count-test'
+ },
+ 'playlist_count': 9
+ }, {
+ # tests returning multiple pages of a large collection
+ 'url': 'https://www.tiktok.com/@imanoreotwe/collection/%F0%9F%98%82-7111887189571160875',
+ 'info_dict': {
+ 'id': '7111887189571160875',
+ 'title': 'imanoreotwe-%F0%9F%98%82'
+ },
+ 'playlist_mincount': 100
+ }]
+ _API_BASE_URL = 'https://www.tiktok.com/api/collection/item_list/'
+ _PAGE_COUNT = 30
+
+ def _build_web_query(self, collection_id, cursor):
+ return {
+ 'aid': '1988',
+ 'collectionId': collection_id,
+ 'count': self._PAGE_COUNT,
+ 'cursor': cursor,
+ 'sourceType': '113',
+ }
+
+ def _entries(self, collection_id):
+ cursor = 0
+ for page in itertools.count(1):
+ response = self._download_json(
+ self._API_BASE_URL, collection_id, f'Downloading page {page}',
+ query=self._build_web_query(collection_id, cursor))
+
+ for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])):
+ video_id = video['id']
+ author = traverse_obj(video, ('author', ('uniqueId', 'secUid', 'id'), {str}, any)) or '_'
+ webpage_url = self._create_url(author, video_id)
+ yield self.url_result(
+ webpage_url, TikTokIE,
+ **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True))
+
+ if not traverse_obj(response, 'hasMore'):
+ break
+ cursor += self._PAGE_COUNT
+
+ def _real_extract(self, url):
+ collection_id, title, user_name = self._match_valid_url(url).group('id', 'title', 'user_id')
+
+ return self.playlist_result(
+ self._entries(collection_id), collection_id, '-'.join((user_name, title)))
+
+
class DouyinIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
_TESTS = [{
@@ -959,7 +1194,6 @@ class DouyinIE(TikTokBaseIE):
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel': '杨超越',
- 'creators': ['杨超越'],
'duration': 19,
'timestamp': 1620905839,
'upload_date': '20210513',
@@ -984,7 +1218,6 @@ class DouyinIE(TikTokBaseIE):
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
'channel': '杨超越工作室',
- 'creators': ['杨超越工作室'],
'duration': 42,
'timestamp': 1625739481,
'upload_date': '20210708',
@@ -1009,7 +1242,6 @@ class DouyinIE(TikTokBaseIE):
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel': '杨超越',
- 'creators': ['杨超越'],
'duration': 17,
'timestamp': 1619098692,
'upload_date': '20210422',
@@ -1051,7 +1283,6 @@ class DouyinIE(TikTokBaseIE):
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel': '杨超越',
- 'creators': ['杨超越'],
'duration': 15,
'timestamp': 1621261163,
'upload_date': '20210517',
diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py
index aa7ee6c..ccb2ef8 100644
--- a/yt_dlp/extractor/toypics.py
+++ b/yt_dlp/extractor/toypics.py
@@ -1,6 +1,7 @@
-from .common import InfoExtractor
import re
+from .common import InfoExtractor
+
class ToypicsIE(InfoExtractor):
_WORKING = False
diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py
index 56e51fe..3bdeedd 100644
--- a/yt_dlp/extractor/triller.py
+++ b/yt_dlp/extractor/triller.py
@@ -14,8 +14,8 @@ from ..utils import (
traverse_obj,
unified_timestamp,
url_basename,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py
index 86f0990..efedac1 100644
--- a/yt_dlp/extractor/trueid.py
+++ b/yt_dlp/extractor/trueid.py
@@ -1,13 +1,13 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_age_limit,
traverse_obj,
unified_timestamp,
- url_or_none
+ url_or_none,
)
diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py
index a26bdca..f2d0c59 100644
--- a/yt_dlp/extractor/tumblr.py
+++ b/yt_dlp/extractor/tumblr.py
@@ -3,7 +3,7 @@ from ..utils import (
ExtractorError,
int_or_none,
traverse_obj,
- urlencode_postdata
+ urlencode_postdata,
)
diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py
index 630d84b..b27db87 100644
--- a/yt_dlp/extractor/turner.py
+++ b/yt_dlp/extractor/turner.py
@@ -3,17 +3,17 @@ import re
from .adobepass import AdobePassIE
from ..compat import compat_str
from ..utils import (
- fix_xml_ampersands,
- xpath_text,
- int_or_none,
+ ExtractorError,
determine_ext,
+ fix_xml_ampersands,
float_or_none,
+ int_or_none,
parse_duration,
- xpath_attr,
- update_url_query,
- ExtractorError,
strip_or_none,
+ update_url_query,
url_or_none,
+ xpath_attr,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py
index 7756aa3..9b19e79 100644
--- a/yt_dlp/extractor/tv2.py
+++ b/yt_dlp/extractor/tv2.py
@@ -3,10 +3,10 @@ import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
- int_or_none,
+ determine_ext,
float_or_none,
+ int_or_none,
js_to_json,
parse_iso8601,
remove_end,
diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py
index 9c0a111..cd35ff5 100644
--- a/yt_dlp/extractor/tv2hu.py
+++ b/yt_dlp/extractor/tv2hu.py
@@ -1,8 +1,8 @@
# encoding: utf-8
from .common import InfoExtractor
from ..utils import (
- traverse_obj,
UnsupportedError,
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py
index a445fae..52ff230 100644
--- a/yt_dlp/extractor/tv5mondeplus.py
+++ b/yt_dlp/extractor/tv5mondeplus.py
@@ -2,85 +2,88 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
+ clean_html,
determine_ext,
extract_attributes,
+ get_element_by_class,
+ get_element_html_by_class,
int_or_none,
- parse_duration,
- traverse_obj,
- try_get,
url_or_none,
)
+from ..utils.traversal import traverse_obj
class TV5MondePlusIE(InfoExtractor):
- IE_DESC = 'TV5MONDE+'
- _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
+ IE_NAME = 'TV5MONDE'
+ _VALID_URL = r'https?://(?:www\.)?tv5monde\.com/tv/video/(?P<id>[^/?#]+)'
_TESTS = [{
- # movie
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
- 'md5': 'c86f60bf8b75436455b1b205f9745955',
+ # documentary
+ 'url': 'https://www.tv5monde.com/tv/video/65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
+ 'md5': 'd2a708902d3df230a357c99701aece05',
'info_dict': {
- 'id': 'ZX0ipMyFQq_6D4BA7b',
- 'display_id': 'les-novices',
+ 'id': '3FPa7JMu21_6D4BA7b',
+ 'display_id': '65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
'ext': 'mp4',
- 'title': 'Les novices',
- 'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
- 'upload_date': '20230821',
- 'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
- 'duration': 5177,
- 'episode': 'Les novices',
+ 'title': "Baudouin, l'héritage d'un roi",
+ 'thumbnail': 'https://psi.tv5monde.com/upsilon-images/960x540/6f/baudouin-f49c6b0e.jpg',
+ 'duration': 4842,
+ 'upload_date': '20240130',
+ 'timestamp': 1706641242,
+ 'episode': "BAUDOUIN, L'HERITAGE D'UN ROI",
+ 'description': 'md5:78125c74a5cac06d7743a2d09126edad',
+ 'series': "Baudouin, l'héritage d'un roi",
},
}, {
# series episode
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
+ 'url': 'https://www.tv5monde.com/tv/video/52952-toute-la-vie-mardi-23-mars-2021',
+ 'md5': 'f5e09637cadd55639c05874e22eb56bf',
'info_dict': {
- 'id': 'wJ0eeEPozr_6D4BA7b',
- 'display_id': 'opj-les-dents-de-la-terre-2',
+ 'id': 'obRRZ8m6g9_6D4BA7b',
+ 'display_id': '52952-toute-la-vie-mardi-23-mars-2021',
'ext': 'mp4',
- 'title': "OPJ - Les dents de la Terre (2)",
- 'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
- 'upload_date': '20230823',
- 'series': 'OPJ',
- 'episode': 'Les dents de la Terre (2)',
- 'duration': 2877,
- 'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
+ 'title': 'Toute la vie',
+ 'description': 'md5:a824a2e1dfd94cf45fa379a1fb43ce65',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5880553.jpg',
+ 'duration': 2526,
+ 'upload_date': '20230721',
+ 'timestamp': 1689971646,
+ 'series': 'Toute la vie',
+ 'episode': 'Mardi 23 mars 2021',
},
}, {
# movie
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
- 'md5': '32fa0cde16a4480d1251502a66856d5f',
+ 'url': 'https://www.tv5monde.com/tv/video/8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
+ 'md5': '87cefc34e10a6bf4f7823cccd7b36eb2',
'info_dict': {
- 'id': 'dc57a011-ec4b-4648-2a9a-4f03f8352ed3',
- 'display_id': 'ceux-qui-travaillent',
+ 'id': 'DOcfvdLKXL_6D4BA7b',
+ 'display_id': '8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
'ext': 'mp4',
- 'title': 'Ceux qui travaillent',
- 'description': 'md5:570e8bb688036ace873b2d50d24c026d',
- 'upload_date': '20210819',
+ 'title': 'Ce fleuve qui nous charrie',
+ 'description': 'md5:62ba3f875343c7fc4082bdfbbc1be992',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5476617.jpg',
+ 'duration': 5300,
+ 'upload_date': '20210822',
+ 'timestamp': 1629594105,
+ 'episode': 'CE FLEUVE QUI NOUS CHARRIE-P001-CE FLEUVE QUI NOUS CHARRIE',
+ 'series': 'Ce fleuve qui nous charrie',
},
- 'skip': 'no longer available',
}, {
- # series episode
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
+ # news
+ 'url': 'https://www.tv5monde.com/tv/video/70402-tv5monde-le-journal-edition-du-08-05-24-11h',
+ 'md5': 'c62977d6d10754a2ecebba70ad370479',
'info_dict': {
- 'id': '9e9d599e-23af-6915-843e-ecbf62e97925',
- 'display_id': 'vestiaires-caro-actrice',
+ 'id': 'LgQFrOCNsc_6D4BA7b',
+ 'display_id': '70402-tv5monde-le-journal-edition-du-08-05-24-11h',
'ext': 'mp4',
- 'title': "Vestiaires - Caro actrice",
- 'description': 'md5:db15d2e1976641e08377f942778058ea',
- 'upload_date': '20210819',
- 'series': "Vestiaires",
- 'episode': 'Caro actrice',
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'TV5MONDE, le journal',
+ 'description': 'md5:777dc209eaa4423b678477c36b0b04a8',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/6184105.jpg',
+ 'duration': 854,
+ 'upload_date': '20240508',
+ 'timestamp': 1715159640,
+ 'series': 'TV5MONDE, le journal',
+ 'episode': 'EDITION DU 08/05/24 - 11H',
},
- 'skip': 'no longer available',
- }, {
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
- 'only_matching': True,
- }, {
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
- 'only_matching': True,
}]
_GEO_BYPASS = False
@@ -98,7 +101,6 @@ class TV5MondePlusIE(InfoExtractor):
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
- title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
vpl_data = extract_attributes(self._search_regex(
r'(<[^>]+class="video_player_loader"[^>]+>)',
webpage, 'video player loader'))
@@ -147,26 +149,7 @@ class TV5MondePlusIE(InfoExtractor):
process_video_files(video_files)
metadata = self._parse_json(
- vpl_data['data-metadata'], display_id)
- duration = (int_or_none(try_get(metadata, lambda x: x['content']['duration']))
- or parse_duration(self._html_search_meta('duration', webpage)))
-
- description = self._html_search_regex(
- r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
- 'description', fatal=False)
-
- series = self._html_search_regex(
- r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
- 'series', default=None)
-
- if series and series != title:
- title = '%s - %s' % (series, title)
-
- upload_date = self._search_regex(
- r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
- webpage, 'upload date', default=None)
- if upload_date:
- upload_date = upload_date.replace('_', '')
+ vpl_data.get('data-metadata') or '{}', display_id, fatal=False)
if not video_id:
video_id = self._search_regex(
@@ -175,16 +158,20 @@ class TV5MondePlusIE(InfoExtractor):
default=display_id)
return {
+ **traverse_obj(metadata, ('content', {
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'episode': ('title', {str}),
+ 'series': ('series', {str}),
+ 'timestamp': ('publishDate_ts', {int_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ })),
'id': video_id,
'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': vpl_data.get('data-image'),
- 'duration': duration,
- 'upload_date': upload_date,
+ 'title': clean_html(get_element_by_class('main-title', webpage)),
+ 'description': clean_html(get_element_by_class('text', get_element_html_by_class('ep-summary', webpage) or '')),
+ 'thumbnail': url_or_none(vpl_data.get('data-image')),
'formats': formats,
'subtitles': self._extract_subtitles(self._parse_json(
traverse_obj(vpl_data, ('data-captions', {str}), default='{}'), display_id, fatal=False)),
- 'series': series,
- 'episode': episode,
}
diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py
index 9afe233..e3e1055 100644
--- a/yt_dlp/extractor/tva.py
+++ b/yt_dlp/extractor/tva.py
@@ -1,10 +1,9 @@
+import functools
+import re
+
from .common import InfoExtractor
-from ..utils import (
- float_or_none,
- int_or_none,
- smuggle_url,
- strip_or_none,
-)
+from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
+from ..utils.traversal import traverse_obj
class TVAIE(InfoExtractor):
@@ -49,11 +48,20 @@ class QubIE(InfoExtractor):
'info_dict': {
'id': '6084352463001',
'ext': 'mp4',
- 'title': 'Épisode 01',
+ 'title': 'Ép 01. Mon dernier jour',
'uploader_id': '5481942443001',
'upload_date': '20190907',
'timestamp': 1567899756,
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'episode': 'Ép 01. Mon dernier jour',
+ 'episode_number': 1,
+ 'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
+ 'duration': 2625.963,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'Alerte Amber',
+ 'channel': 'TVA',
},
}, {
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
@@ -64,22 +72,24 @@ class QubIE(InfoExtractor):
def _real_extract(self, url):
entity_id = self._match_id(url)
- entity = self._download_json(
- 'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
- entity_id, query={'id': entity_id})
+ webpage = self._download_webpage(url, entity_id)
+ entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
video_id = entity['videoId']
episode = strip_or_none(entity.get('name'))
return {
'_type': 'url_transparent',
+ 'url': f'https://videos.tva.ca/details/_{video_id}',
+ 'ie_key': TVAIE.ie_key(),
'id': video_id,
'title': episode,
- # 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
- 'url': 'https://videos.tva.ca/details/_' + video_id,
- 'description': entity.get('longDescription'),
- 'duration': float_or_none(entity.get('durationMillis'), 1000),
'episode': episode,
- 'episode_number': int_or_none(entity.get('episodeNumber')),
- # 'ie_key': 'BrightcoveNew',
- 'ie_key': TVAIE.ie_key(),
+ **traverse_obj(entity, {
+ 'description': ('longDescription', {str}),
+ 'duration': ('durationMillis', {functools.partial(float_or_none, scale=1000)}),
+ 'channel': ('knownEntities', 'channel', 'name', {str}),
+ 'series': ('knownEntities', 'videoShow', 'name', {str}),
+ 'season_number': ('slug', {lambda x: re.search(r'/s(?:ai|ea)son-(\d+)/', x)}, 1, {int_or_none}),
+ 'episode_number': ('episodeNumber', {int_or_none}),
+ }),
}
diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py
index b9f5e11..dbebda4 100644
--- a/yt_dlp/extractor/tvanouvelles.py
+++ b/yt_dlp/extractor/tvanouvelles.py
@@ -1,7 +1,7 @@
import re
-from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
class TVANouvellesIE(InfoExtractor):
diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py
index 5276813..ac48058 100644
--- a/yt_dlp/extractor/tvn24.py
+++ b/yt_dlp/extractor/tvn24.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
NO_DEFAULT,
+ int_or_none,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py
index a8d00e2..f1ebf02 100644
--- a/yt_dlp/extractor/tvp.py
+++ b/yt_dlp/extractor/tvp.py
@@ -4,10 +4,10 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
dict_get,
- ExtractorError,
int_or_none,
js_to_json,
str_or_none,
diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py
index 48a6efe..29185d3 100644
--- a/yt_dlp/extractor/tvplay.py
+++ b/yt_dlp/extractor/tvplay.py
@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_iso8601,
qualities,
diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py
index 228c236..d43bdc2 100644
--- a/yt_dlp/extractor/tvplayer.py
+++ b/yt_dlp/extractor/tvplayer.py
@@ -2,10 +2,10 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
extract_attributes,
try_get,
urlencode_postdata,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py
index e8e1fc6..9249550 100644
--- a/yt_dlp/extractor/tweakers.py
+++ b/yt_dlp/extractor/tweakers.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
determine_ext,
+ int_or_none,
mimetype2ext,
)
diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py
index c55786a..80cba09 100644
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@@ -191,17 +191,25 @@ class TwitchBaseIE(InfoExtractor):
}] if thumbnail else None
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
- return self._extract_m3u8_formats(
+ formats = self._extract_m3u8_formats(
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
'allow_source': 'true',
'allow_audio_only': 'true',
'allow_spectre': 'true',
'p': random.randint(1000000, 10000000),
+ 'platform': 'web',
'player': 'twitchweb',
+ 'supported_codecs': 'av1,h265,h264',
'playlist_include_framerate': 'true',
'sig': signature,
'token': token,
})
+ for fmt in formats:
+ if fmt.get('vcodec') and fmt['vcodec'].startswith('av01'):
+ # mpegts does not yet have proper support for av1
+ fmt['downloader_options'] = {'ffmpeg_args_out': ['-f', 'mp4']}
+
+ return formats
class TwitchVodIE(TwitchBaseIE):
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index ecc8656..1a11162 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1,10 +1,10 @@
+import functools
import json
import random
import re
from .common import InfoExtractor
from .periscope import PeriscopeBaseIE, PeriscopeIE
-from ..compat import functools # isort: split
from ..compat import (
compat_parse_qs,
compat_urllib_parse_unquote,
@@ -34,9 +34,9 @@ from ..utils import (
class TwitterBaseIE(InfoExtractor):
_NETRC_MACHINE = 'twitter'
- _API_BASE = 'https://api.twitter.com/1.1/'
- _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
- _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
+ _API_BASE = 'https://api.x.com/1.1/'
+ _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
+ _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
_flow_token = None
@@ -153,6 +153,14 @@ class TwitterBaseIE(InfoExtractor):
def is_logged_in(self):
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
+ # XXX: Temporary workaround until twitter.com => x.com migration is completed
+ def _real_initialize(self):
+ if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
+ return
+ # User has not yet been migrated to x.com and has passed twitter.com cookies
+ TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
+ TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
+
@functools.cached_property
def _selected_api(self):
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
@@ -196,17 +204,15 @@ class TwitterBaseIE(InfoExtractor):
if self.is_logged_in:
return
- webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
- guest_token = self._search_regex(
- r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
+ guest_token = self._fetch_guest_token(None)
headers = {
**self._set_base_headers(),
'content-type': 'application/json',
'x-guest-token': guest_token,
'x-twitter-client-language': 'en',
'x-twitter-active-user': 'yes',
- 'Referer': 'https://twitter.com/',
- 'Origin': 'https://twitter.com',
+ 'Referer': 'https://x.com/',
+ 'Origin': 'https://x.com',
}
def build_login_json(*subtask_inputs):
@@ -1192,6 +1198,31 @@ class TwitterIE(TwitterBaseIE):
'_old_archive_ids': ['twitter 1724884212803834154'],
},
}, {
+ # x.com
+ 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
+ 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
+ 'info_dict': {
+ 'id': '1790637589910654976',
+ 'ext': 'mp4',
+ 'title': 'Historic Vids - One of the most intense moments in history',
+ 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
+ 'display_id': '1790637656616943991',
+ 'uploader': 'Historic Vids',
+ 'uploader_id': 'historyinmemes',
+ 'uploader_url': 'https://twitter.com/historyinmemes',
+ 'channel_id': '855481986290524160',
+ 'upload_date': '20240515',
+ 'timestamp': 1715756260.0,
+ 'duration': 15.488,
+ 'tags': [],
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 1790637656616943991'],
+ }
+ }, {
# onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
'only_matching': True,
diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py
index 10668ac..d5849d2 100644
--- a/yt_dlp/extractor/udn.py
+++ b/yt_dlp/extractor/udn.py
@@ -1,12 +1,12 @@
import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
determine_ext,
int_or_none,
js_to_json,
)
-from ..compat import compat_urlparse
class UDNEmbedIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ukcolumn.py b/yt_dlp/extractor/ukcolumn.py
index f914613..f141804 100644
--- a/yt_dlp/extractor/ukcolumn.py
+++ b/yt_dlp/extractor/ukcolumn.py
@@ -1,11 +1,11 @@
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from .youtube import YoutubeIE
from ..utils import (
+ ExtractorError,
unescapeHTML,
urljoin,
- ExtractorError,
)
-from .common import InfoExtractor
-from .vimeo import VimeoIE
-from .youtube import YoutubeIE
class UkColumnIE(InfoExtractor):
diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py
index 4316c31..1e2d118 100644
--- a/yt_dlp/extractor/unsupported.py
+++ b/yt_dlp/extractor/unsupported.py
@@ -173,6 +173,20 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
r'filemoon\.sx',
r'hentai\.animestigma\.com',
r'thisav\.com',
+ r'gounlimited\.to',
+ r'highstream\.tv',
+ r'uqload\.com',
+ r'vedbam\.xyz',
+ r'vadbam\.net'
+ r'vidlo\.us',
+ r'wolfstream\.tv',
+ r'xvideosharing\.com',
+ r'(?:\w+\.)?viidshar\.com',
+ r'sxyprn\.com',
+ r'jable\.tv',
+ r'91porn\.com',
+ r'einthusan\.(?:tv|com|ca)',
+ r'yourupload\.com',
)
_TESTS = [{
diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py
index 7f97fc9..928e6e1 100644
--- a/yt_dlp/extractor/urplay.py
+++ b/yt_dlp/extractor/urplay.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..utils import (
- dict_get,
ExtractorError,
- int_or_none,
ISO639Utils,
+ dict_get,
+ int_or_none,
parse_age_limit,
try_get,
unified_timestamp,
diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py
index 3243f3e..42a28c5 100644
--- a/yt_dlp/extractor/usatoday.py
+++ b/yt_dlp/extractor/usatoday.py
@@ -1,4 +1,5 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
get_element_by_attribute,
@@ -6,7 +7,6 @@ from ..utils import (
try_get,
update_url_query,
)
-from ..compat import compat_str
class USATodayIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py
index 5df2416..046e3d7 100644
--- a/yt_dlp/extractor/ustream.py
+++ b/yt_dlp/extractor/ustream.py
@@ -7,10 +7,10 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
- encode_data_uri,
ExtractorError,
- int_or_none,
+ encode_data_uri,
float_or_none,
+ int_or_none,
join_nonempty,
mimetype2ext,
str_or_none,
diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py
index c3aeeb9..f6ce5b3 100644
--- a/yt_dlp/extractor/ustudio.py
+++ b/yt_dlp/extractor/ustudio.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
- unified_strdate,
unescapeHTML,
+ unified_strdate,
)
diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py
index ef44d42..205f8ea 100644
--- a/yt_dlp/extractor/veo.py
+++ b/yt_dlp/extractor/veo.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
mimetype2ext,
diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py
index 3f2dddb..a2e9022 100644
--- a/yt_dlp/extractor/vesti.py
+++ b/yt_dlp/extractor/vesti.py
@@ -1,8 +1,8 @@
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
from .rutv import RUTVIE
+from ..utils import ExtractorError
class VestiIE(InfoExtractor):
diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py
index aa40227..7715d68 100644
--- a/yt_dlp/extractor/vevo.py
+++ b/yt_dlp/extractor/vevo.py
@@ -1,5 +1,5 @@
-import re
import json
+import re
from .common import InfoExtractor
from ..compat import compat_str
diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py
index d31908f..b072d9d 100644
--- a/yt_dlp/extractor/vice.py
+++ b/yt_dlp/extractor/vice.py
@@ -10,10 +10,10 @@ from .youtube import YoutubeIE
from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
- clean_html,
ExtractorError,
- int_or_none,
OnDemandPagedList,
+ clean_html,
+ int_or_none,
parse_age_limit,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py
index 770aa28..6322bb0 100644
--- a/yt_dlp/extractor/vidio.py
+++ b/yt_dlp/extractor/vidio.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
format_field,
get_element_by_class,
int_or_none,
diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py
index 44353b7..e1219a8 100644
--- a/yt_dlp/extractor/vidlii.py
+++ b/yt_dlp/extractor/vidlii.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
- format_field,
float_or_none,
+ format_field,
get_element_by_id,
int_or_none,
str_to_int,
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 91b9764..ac96ade 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -1,21 +1,21 @@
import base64
import functools
-import re
import itertools
+import re
from .common import InfoExtractor
from ..compat import compat_str, compat_urlparse
from ..networking import HEADRequest, Request
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
clean_html,
determine_ext,
- ExtractorError,
get_element_by_class,
- js_to_json,
int_or_none,
+ js_to_json,
merge_dicts,
- OnDemandPagedList,
parse_filesize,
parse_iso8601,
parse_qs,
@@ -26,8 +26,8 @@ from ..utils import (
unified_timestamp,
unsmuggle_url,
urlencode_postdata,
- urljoin,
urlhandle_detect_ext,
+ urljoin,
)
diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py
index 6f9af9f..480f49b 100644
--- a/yt_dlp/extractor/viu.py
+++ b/yt_dlp/extractor/viu.py
@@ -1,8 +1,8 @@
-import re
import json
-import uuid
import random
+import re
import urllib.parse
+import uuid
from .common import InfoExtractor
from ..compat import compat_str
@@ -10,10 +10,10 @@ from ..utils import (
ExtractorError,
int_or_none,
remove_end,
+ smuggle_url,
strip_or_none,
traverse_obj,
try_get,
- smuggle_url,
unified_timestamp,
unsmuggle_url,
url_or_none,
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py
index e4a78c2..9a3c75b 100644
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@@ -20,6 +20,7 @@ from ..utils import (
parse_resolution,
str_or_none,
str_to_int,
+ traverse_obj,
try_call,
unescapeHTML,
unified_timestamp,
@@ -27,7 +28,6 @@ from ..utils import (
url_or_none,
urlencode_postdata,
urljoin,
- traverse_obj,
)
@@ -451,6 +451,7 @@ class VKIE(VKBaseIE):
info_page, 'view count', default=None))
formats = []
+ subtitles = {}
for format_id, format_url in data.items():
format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
@@ -462,12 +463,21 @@ class VKIE(VKBaseIE):
formats.append({
'format_id': format_id,
'url': format_url,
+ 'ext': 'mp4',
+ 'source_preference': 1,
'height': height,
})
- elif format_id == 'hls':
- formats.extend(self._extract_m3u8_formats(
+ elif format_id.startswith('hls') and format_id != 'hls_live_playback':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False, live=is_live))
+ m3u8_id=format_id, fatal=False, live=is_live)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id=format_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
elif format_id == 'rtmp':
formats.append({
'format_id': format_id,
@@ -475,7 +485,6 @@ class VKIE(VKBaseIE):
'ext': 'flv',
})
- subtitles = {}
for sub in data.get('subs') or {}:
subtitles.setdefault(sub.get('lang', 'en'), []).append({
'ext': sub.get('title', '.srt').split('.')[-1],
@@ -496,6 +505,7 @@ class VKIE(VKBaseIE):
'comment_count': int_or_none(mv_data.get('commcount')),
'is_live': is_live,
'subtitles': subtitles,
+ '_format_sort_fields': ('res', 'source'),
}
@@ -707,6 +717,7 @@ class VKWallPostIE(VKBaseIE):
class VKPlayBaseIE(InfoExtractor):
+ _BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/'
_RESOLUTIONS = {
'tiny': '256x144',
'lowest': '426x240',
@@ -765,7 +776,7 @@ class VKPlayBaseIE(InfoExtractor):
class VKPlayIE(VKPlayBaseIE):
- _VALID_URL = r'https?://vkplay\.live/(?P<username>[^/#?]+)/record/(?P<id>[a-f0-9-]+)'
+ _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<username>[^/#?]+)/record/(?P<id>[\da-f-]+)'
_TESTS = [{
'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
'info_dict': {
@@ -776,13 +787,16 @@ class VKPlayIE(VKPlayBaseIE):
'uploader_id': '13159830',
'release_timestamp': 1683461378,
'release_date': '20230507',
- 'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+',
+ 'thumbnail': r're:https://[^/]+/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview',
'duration': 10608,
'view_count': int,
'like_count': int,
'categories': ['Atomic Heart'],
},
'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -802,7 +816,7 @@ class VKPlayIE(VKPlayBaseIE):
class VKPlayLiveIE(VKPlayBaseIE):
- _VALID_URL = r'https?://vkplay\.live/(?P<id>[^/#?]+)/?(?:[#?]|$)'
+ _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<id>[^/#?]+)/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://vkplay.live/bayda',
'info_dict': {
@@ -813,7 +827,7 @@ class VKPlayLiveIE(VKPlayBaseIE):
'uploader_id': '12279401',
'release_timestamp': 1687209962,
'release_date': '20230619',
- 'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+',
+ 'thumbnail': r're:https://[^/]+/public_video_stream/12279401/preview',
'view_count': int,
'concurrent_view_count': int,
'like_count': int,
@@ -822,6 +836,9 @@ class VKPlayLiveIE(VKPlayBaseIE):
},
'skip': 'livestream',
'params': {'skip_download': True},
+ }, {
+ 'url': 'https://live.vkplay.ru/lebwa',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py
deleted file mode 100644
index ef77bed..0000000
--- a/yt_dlp/extractor/voot.py
+++ /dev/null
@@ -1,212 +0,0 @@
-import json
-import time
-import uuid
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..networking.exceptions import HTTPError
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- jwt_decode_hs256,
- parse_age_limit,
- traverse_obj,
- try_call,
- try_get,
- unified_strdate,
-)
-
-
-class VootBaseIE(InfoExtractor):
- _NETRC_MACHINE = 'voot'
- _GEO_BYPASS = False
- _LOGIN_HINT = 'Log in with "-u <email_address> -p <password>", or use "-u token -p <auth_token>" to login with auth token.'
- _TOKEN = None
- _EXPIRY = 0
- _API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'}
-
- def _perform_login(self, username, password):
- if self._TOKEN and self._EXPIRY:
- return
-
- if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
- VootBaseIE._TOKEN = password
- VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp']
- self.report_login()
-
- # Mobile number as username is not supported
- elif not username.isdigit():
- check_username = self._download_json(
- 'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({
- 'type': 'email',
- 'email': username
- }, separators=(',', ':')).encode(), headers={
- **self._API_HEADERS,
- 'Content-Type': 'application/json;charset=utf-8',
- }, note='Checking username', expected_status=403)
- if not traverse_obj(check_username, ('isExist', {bool})):
- if traverse_obj(check_username, ('status', 'code', {int})) == 9999:
- self.raise_geo_restricted(countries=['IN'])
- raise ExtractorError('Incorrect username', expected=True)
- auth_token = traverse_obj(self._download_json(
- 'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({
- 'type': 'traditional',
- 'deviceId': str(uuid.uuid4()),
- 'deviceBrand': 'PC/MAC',
- 'data': {
- 'email': username,
- 'password': password
- }
- }, separators=(',', ':')).encode(), headers={
- **self._API_HEADERS,
- 'Content-Type': 'application/json;charset=utf-8',
- }, note='Logging in', expected_status=400), ('data', 'authToken', {dict}))
- if not auth_token:
- raise ExtractorError('Incorrect password', expected=True)
- VootBaseIE._TOKEN = auth_token['accessToken']
- VootBaseIE._EXPIRY = auth_token['expirationTime']
-
- else:
- raise ExtractorError(self._LOGIN_HINT, expected=True)
-
- def _check_token_expiry(self):
- if int(time.time()) >= self._EXPIRY:
- raise ExtractorError('Access token has expired', expected=True)
-
- def _real_initialize(self):
- if not self._TOKEN:
- self.raise_login_required(self._LOGIN_HINT, method=None)
- self._check_token_expiry()
-
-
-class VootIE(VootBaseIE):
- _WORKING = False
- _VALID_URL = r'''(?x)
- (?:
- voot:|
- https?://(?:www\.)?voot\.com/?
- (?:
- movies?/[^/]+/|
- (?:shows|kids)/(?:[^/]+/){4}
- )
- )
- (?P<id>\d{3,})
- '''
- _TESTS = [{
- 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
- 'info_dict': {
- 'id': '441353',
- 'ext': 'mp4',
- 'title': 'Is this the end of Kamini?',
- 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
- 'timestamp': 1472103000,
- 'upload_date': '20160825',
- 'series': 'Ishq Ka Rang Safed',
- 'season_number': 1,
- 'episode': 'Is this the end of Kamini?',
- 'episode_number': 340,
- 'release_date': '20160825',
- 'season': 'Season 1',
- 'age_limit': 13,
- 'duration': 1146.0,
- },
- 'params': {'skip_download': 'm3u8'},
- }, {
- 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
- 'only_matching': True,
- }, {
- 'url': 'https://www.voot.com/movies/pandavas-5/424627',
- 'only_matching': True,
- }, {
- 'url': 'https://www.voot.com/movie/fight-club/621842',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- media_info = self._download_json(
- 'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id,
- query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN})
-
- try:
- m3u8_url = self._download_json(
- 'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id,
- 'Downloading playback JSON', data=b'{}', headers={
- **self.geo_verification_headers(),
- **self._API_HEADERS,
- 'Content-Type': 'application/json;charset=utf-8',
- 'platform': 'androidwebdesktop',
- 'vootid': video_id,
- 'voottoken': self._TOKEN,
- })['m3u8']
- except ExtractorError as e:
- if isinstance(e.cause, HTTPError) and e.cause.status == 400:
- self._check_token_expiry()
- raise
-
- formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
- self._remove_duplicate_formats(formats)
-
- return {
- 'id': video_id,
- # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
- 'formats': traverse_obj(formats, (
- lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
- 'http_headers': self._API_HEADERS,
- **traverse_obj(media_info, ('result', 0, {
- 'title': ('fullTitle', {str}),
- 'description': ('fullSynopsis', {str}),
- 'series': ('showName', {str}),
- 'season_number': ('season', {int_or_none}),
- 'episode': ('fullTitle', {str}),
- 'episode_number': ('episode', {int_or_none}),
- 'timestamp': ('uploadTime', {int_or_none}),
- 'release_date': ('telecastDate', {unified_strdate}),
- 'age_limit': ('ageNemonic', {parse_age_limit}),
- 'duration': ('duration', {float_or_none}),
- })),
- }
-
-
-class VootSeriesIE(VootBaseIE):
- _WORKING = False
- _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})'
- _TESTS = [{
- 'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',
- 'playlist_mincount': 442,
- 'info_dict': {
- 'id': '100002',
- },
- }, {
- 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/100003',
- 'playlist_mincount': 341,
- 'info_dict': {
- 'id': '100003',
- },
- }]
- _SHOW_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/season-by-show?sort=season%3Aasc&id={}&responseType=common'
- _SEASON_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/series-wise-episode?sort=episode%3Aasc&id={}&responseType=common&page={:d}'
-
- def _entries(self, show_id):
- show_json = self._download_json(self._SHOW_API.format(show_id), video_id=show_id)
- for season in show_json.get('result', []):
- page_num = 1
- season_id = try_get(season, lambda x: x['id'], compat_str)
- season_json = self._download_json(self._SEASON_API.format(season_id, page_num),
- video_id=season_id,
- note='Downloading JSON metadata page %d' % page_num)
- episodes_json = season_json.get('result', [])
- while episodes_json:
- page_num += 1
- for episode in episodes_json:
- video_id = episode.get('id')
- yield self.url_result(
- 'voot:%s' % video_id, ie=VootIE.ie_key(), video_id=video_id)
- episodes_json = self._download_json(self._SEASON_API.format(season_id, page_num),
- video_id=season_id,
- note='Downloading JSON metadata page %d' % page_num)['result']
-
- def _real_extract(self, url):
- show_id = self._match_id(url)
- return self.playlist_result(self._entries(show_id), playlist_id=show_id)
diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py
index 497233d..3d26549 100644
--- a/yt_dlp/extractor/vrt.py
+++ b/yt_dlp/extractor/vrt.py
@@ -16,6 +16,7 @@ from ..utils import (
join_nonempty,
jwt_encode_hs256,
make_archive_id,
+ merge_dicts,
parse_age_limit,
parse_iso8601,
str_or_none,
@@ -425,3 +426,64 @@ class DagelijkseKostIE(VRTBaseIE):
['description', 'twitter:description', 'og:description'], webpage),
'_old_archive_ids': [make_archive_id('Canvas', video_id)],
}
+
+
+class Radio1BeIE(VRTBaseIE):
+ _VALID_URL = r'https?://radio1\.be/(?:lees|luister/select)/(?P<id>[\w/-]+)'
+ _TESTS = [{
+ 'url': 'https://radio1.be/luister/select/de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie',
+ 'info_dict': {
+ 'id': 'eb6c22e9-544f-44f4-af39-cf8cccd29e22',
+ 'title': 'Komt N-VA volgend jaar op in Wallonië?',
+ 'display_id': 'de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie',
+ 'description': 'md5:b374ea1c9302f38362df9dea1931468e',
+ 'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+'
+ },
+ 'playlist_mincount': 1
+ }, {
+ 'url': 'https://radio1.be/lees/europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza?view=web',
+ 'info_dict': {
+ 'id': '5d47f102-dbdb-4fa0-832b-26c1870311f2',
+ 'title': 'Europese Unie wil "onmiddellijke humanitaire pauze" en "duurzaam staakt-het-vuren" in Gaza',
+ 'description': 'md5:1aad1fae7d39edeffde5d3e67d276b64',
+ 'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+',
+ 'display_id': 'europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza'
+ },
+ 'playlist_mincount': 1
+ }]
+
+ def _extract_video_entries(self, next_js_data, display_id):
+ video_data = traverse_obj(
+ next_js_data, ((None, ('paragraphs', ...)), {lambda x: x if x['mediaReference'] else None}))
+ for data in video_data:
+ media_reference = data['mediaReference']
+ formats, subtitles = self._extract_formats_and_subtitles(
+ self._call_api(media_reference), display_id)
+
+ yield {
+ 'id': media_reference,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('body', {clean_html})
+ }),
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['item']
+
+ return self.playlist_result(
+ self._extract_video_entries(next_js_data, display_id), **merge_dicts(traverse_obj(
+ next_js_data, ({
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'description': (('description', 'content'), {clean_html}),
+ }), get_all=False), {
+ 'display_id': display_id,
+ 'title': self._html_search_meta(['name', 'og:title', 'twitter:title'], webpage),
+ 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
+ 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
+ }))
diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py
index a1a9c17..3ac0f83 100644
--- a/yt_dlp/extractor/walla.py
+++ b/yt_dlp/extractor/walla.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- xpath_text,
int_or_none,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py
index 74501b1..1cfed2d 100644
--- a/yt_dlp/extractor/washingtonpost.py
+++ b/yt_dlp/extractor/washingtonpost.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import traverse_obj
diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py
index f80f140..0b7ddd2 100644
--- a/yt_dlp/extractor/wdr.py
+++ b/yt_dlp/extractor/wdr.py
@@ -6,16 +6,16 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
+ ExtractorError,
determine_ext,
dict_get,
- ExtractorError,
js_to_json,
strip_jsonp,
try_get,
unified_strdate,
update_url_query,
- urlhandle_detect_ext,
url_or_none,
+ urlhandle_detect_ext,
)
diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py
index 2fca745..b6a6593 100644
--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@@ -1,6 +1,6 @@
+import itertools
import json
import random
-import itertools
import urllib.parse
from .common import InfoExtractor
diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py
index f2808cd..492891d 100644
--- a/yt_dlp/extractor/whowatch.py
+++ b/yt_dlp/extractor/whowatch.py
@@ -1,12 +1,12 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ ExtractorError,
int_or_none,
qualities,
try_call,
try_get,
- ExtractorError,
)
-from ..compat import compat_str
class WhoWatchIE(InfoExtractor):
diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py
index f9bf092..d7d77c0 100644
--- a/yt_dlp/extractor/wimtv.py
+++ b/yt_dlp/extractor/wimtv.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
determine_ext,
parse_duration,
urlencode_postdata,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py
index bce5e83..f2256fd 100644
--- a/yt_dlp/extractor/wistia.py
+++ b/yt_dlp/extractor/wistia.py
@@ -1,6 +1,6 @@
+import base64
import re
import urllib.parse
-from base64 import b64decode
from .common import InfoExtractor
from ..networking import HEADRequest
@@ -371,7 +371,7 @@ class WistiaChannelIE(WistiaBaseIE):
webpage = self._download_webpage(f'https://fast.wistia.net/embed/channel/{channel_id}', channel_id)
data = self._parse_json(
self._search_regex(r'wchanneljsonp-%s\'\]\s*=[^\"]*\"([A-Za-z0-9=/]*)' % channel_id, webpage, 'jsonp', channel_id),
- channel_id, transform_source=lambda x: urllib.parse.unquote_plus(b64decode(x).decode('utf-8')))
+ channel_id, transform_source=lambda x: urllib.parse.unquote_plus(base64.b64decode(x).decode('utf-8')))
# XXX: can there be more than one series?
series = traverse_obj(data, ('series', 0), default={})
diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py
index 5e590e2..0ef4e8e 100644
--- a/yt_dlp/extractor/wppilot.py
+++ b/yt_dlp/extractor/wppilot.py
@@ -1,13 +1,13 @@
+import json
+import random
+import re
+
from .common import InfoExtractor
from ..utils import (
- try_get,
ExtractorError,
+ try_get,
)
-import json
-import random
-import re
-
class WPPilotBaseIE(InfoExtractor):
_VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s'
diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py
index 145246a..d401d6d 100644
--- a/yt_dlp/extractor/wrestleuniverse.py
+++ b/yt_dlp/extractor/wrestleuniverse.py
@@ -12,6 +12,7 @@ from ..utils import (
jwt_decode_hs256,
traverse_obj,
try_call,
+ url_basename,
url_or_none,
urlencode_postdata,
variadic,
@@ -147,7 +148,7 @@ class WrestleUniverseBaseIE(InfoExtractor):
metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
if not metadata:
webpage = self._download_webpage(url, video_id)
- nextjs_data = self._search_nextjs_data(webpage, video_id)
+ nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
metadata = traverse_obj(nextjs_data, (
'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
return metadata
@@ -194,8 +195,7 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE):
return {
'id': video_id,
- 'formats': self._get_formats(video_data, (
- (('protocolHls', 'url'), ('chromecastUrls', ...)), {url_or_none}), video_id),
+ 'formats': self._get_formats(video_data, ('protocolHls', 'url', {url_or_none}), video_id),
**traverse_obj(metadata, {
'title': ('displayName', {str}),
'description': ('description', {str}),
@@ -259,6 +259,10 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
'params': {
'skip_download': 'm3u8',
},
+ }, {
+ 'note': 'manifest provides live-a (partial) and live-b (full) streams',
+ 'url': 'https://www.wrestle-universe.com/en/lives/umc99R9XsexXrxr9VjTo9g',
+ 'only_matching': True,
}]
_API_PATH = 'events'
@@ -285,12 +289,16 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
video_data, decrypt = self._call_encrypted_api(
video_id, ':watchArchive', 'watch archive', data={'method': 1})
- info['formats'] = self._get_formats(video_data, (
- ('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id)
+ # 'chromecastUrls' can be only partial videos, avoid
+ info['formats'] = self._get_formats(video_data, ('hls', (('urls', ...), 'url'), {url_or_none}), video_id)
for f in info['formats']:
# bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
if f.get('tbr'):
f['tbr'] = int(f['tbr'] / 2.5)
+ # prefer variants with the same basename as the master playlist to avoid partial streams
+ f['format_id'] = url_basename(f['url']).partition('.')[0]
+ if not f['format_id'].startswith(url_basename(f['manifest_url']).partition('.')[0]):
+ f['preference'] = -10
hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
if hls_aes_key:
diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py
index 86e2646..35fe303 100644
--- a/yt_dlp/extractor/wsj.py
+++ b/yt_dlp/extractor/wsj.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
float_or_none,
+ int_or_none,
unified_strdate,
)
diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py
deleted file mode 100644
index 08c6d6c..0000000
--- a/yt_dlp/extractor/xfileshare.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- decode_packed_codes,
- determine_ext,
- int_or_none,
- js_to_json,
- urlencode_postdata,
-)
-
-
-# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
-def aa_decode(aa_code):
- symbol_table = [
- ('7', '((゚ー゚) + (o^_^o))'),
- ('6', '((o^_^o) +(o^_^o))'),
- ('5', '((゚ー゚) + (゚Θ゚))'),
- ('2', '((o^_^o) - (゚Θ゚))'),
- ('4', '(゚ー゚)'),
- ('3', '(o^_^o)'),
- ('1', '(゚Θ゚)'),
- ('0', '(c^_^o)'),
- ]
- delim = '(゚Д゚)[゚ε゚]+'
- ret = ''
- for aa_char in aa_code.split(delim):
- for val, pat in symbol_table:
- aa_char = aa_char.replace(pat, val)
- aa_char = aa_char.replace('+ ', '')
- m = re.match(r'^\d+', aa_char)
- if m:
- ret += chr(int(m.group(0), 8))
- else:
- m = re.match(r'^u([\da-f]+)', aa_char)
- if m:
- ret += chr(int(m.group(1), 16))
- return ret
-
-
-class XFileShareIE(InfoExtractor):
- _SITES = (
- (r'aparat\.cam', 'Aparat'),
- (r'clipwatching\.com', 'ClipWatching'),
- (r'gounlimited\.to', 'GoUnlimited'),
- (r'govid\.me', 'GoVid'),
- (r'holavid\.com', 'HolaVid'),
- (r'streamty\.com', 'Streamty'),
- (r'thevideobee\.to', 'TheVideoBee'),
- (r'uqload\.com', 'Uqload'),
- (r'vidbom\.com', 'VidBom'),
- (r'vidlo\.us', 'vidlo'),
- (r'vidlocker\.xyz', 'VidLocker'),
- (r'vidshare\.tv', 'VidShare'),
- (r'vup\.to', 'VUp'),
- (r'wolfstream\.tv', 'WolfStream'),
- (r'xvideosharing\.com', 'XVideoSharing'),
- )
-
- IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
- _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
- % '|'.join(site for site in list(zip(*_SITES))[0]))
- _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
-
- _FILE_NOT_FOUND_REGEXES = (
- r'>(?:404 - )?File Not Found<',
- r'>The file was removed by administrator<',
- )
-
- _TESTS = [{
- 'url': 'https://uqload.com/dltx1wztngdz',
- 'md5': '3cfbb65e4c90e93d7b37bcb65a595557',
- 'info_dict': {
- 'id': 'dltx1wztngdz',
- 'ext': 'mp4',
- 'title': 'Rick Astley Never Gonna Give You mp4',
- 'thumbnail': r're:https://.*\.jpg'
- }
- }, {
- 'url': 'http://xvideosharing.com/fq65f94nd2ve',
- 'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
- 'info_dict': {
- 'id': 'fq65f94nd2ve',
- 'ext': 'mp4',
- 'title': 'sample',
- 'thumbnail': r're:http://.*\.jpg',
- },
- }, {
- 'url': 'https://aparat.cam/n4d6dh0wvlpr',
- 'only_matching': True,
- }, {
- 'url': 'https://wolfstream.tv/nthme29v9u2x',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- host, video_id = self._match_valid_url(url).groups()
-
- url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
- webpage = self._download_webpage(url, video_id)
-
- if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- fields = self._hidden_inputs(webpage)
-
- if fields.get('op') == 'download1':
- countdown = int_or_none(self._search_regex(
- r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
- webpage, 'countdown', default=None))
- if countdown:
- self._sleep(countdown, video_id)
-
- webpage = self._download_webpage(
- url, video_id, 'Downloading video page',
- data=urlencode_postdata(fields), headers={
- 'Referer': url,
- 'Content-type': 'application/x-www-form-urlencoded',
- })
-
- title = (self._search_regex(
- (r'style="z-index: [0-9]+;">([^<]+)</span>',
- r'<td nowrap>([^<]+)</td>',
- r'h4-fine[^>]*>([^<]+)<',
- r'>Watch (.+)[ <]',
- r'<h2 class="video-page-head">([^<]+)</h2>',
- r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
- r'title\s*:\s*"([^"]+)"'), # govid.me
- webpage, 'title', default=None) or self._og_search_title(
- webpage, default=None) or video_id).strip()
-
- for regex, func in (
- (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
- (r'(゚.+)', aa_decode)):
- obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
- if obf_code:
- webpage = webpage.replace(obf_code, func(obf_code))
-
- formats = []
-
- jwplayer_data = self._search_regex(
- [
- r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
- r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
- ], webpage,
- 'jwplayer data', default=None)
- if jwplayer_data:
- jwplayer_data = self._parse_json(
- jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
- if jwplayer_data:
- formats = self._parse_jwplayer_data(
- jwplayer_data, video_id, False,
- m3u8_id='hls', mpd_id='dash')['formats']
-
- if not formats:
- urls = []
- for regex in (
- r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
- r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
- r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
- r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
- for mobj in re.finditer(regex, webpage):
- video_url = mobj.group('url')
- if video_url not in urls:
- urls.append(video_url)
-
- sources = self._search_regex(
- r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
- if sources:
- urls.extend(self._parse_json(sources, video_id))
-
- formats = []
- for video_url in urls:
- if determine_ext(video_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls',
- fatal=False))
- else:
- formats.append({
- 'url': video_url,
- 'format_id': 'sd',
- })
-
- thumbnail = self._search_regex(
- [
- r'<video[^>]+poster="([^"]+)"',
- r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
- ], webpage, 'thumbnail', default=None)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- 'http_headers': {'Referer': url}
- }
diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py
index 01ac5dd..0b3a620 100644
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@@ -4,11 +4,11 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
dict_get,
extract_attributes,
- ExtractorError,
float_or_none,
int_or_none,
parse_duration,
diff --git a/yt_dlp/extractor/xiaohongshu.py b/yt_dlp/extractor/xiaohongshu.py
new file mode 100644
index 0000000..faad9d9
--- /dev/null
+++ b/yt_dlp/extractor/xiaohongshu.py
@@ -0,0 +1,83 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class XiaoHongShuIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.xiaohongshu\.com/explore/(?P<id>[\da-f]+)'
+ IE_DESC = '小红书'
+ _TESTS = [{
+ 'url': 'https://www.xiaohongshu.com/explore/6411cf99000000001300b6d9',
+ 'md5': '2a87a77ddbedcaeeda8d7eae61b61228',
+ 'info_dict': {
+ 'id': '6411cf99000000001300b6d9',
+ 'ext': 'mp4',
+ 'uploader_id': '5c31698d0000000007018a31',
+ 'description': '#今日快乐今日发[话题]# #吃货薯看这里[话题]# #香妃蛋糕[话题]# #小五卷蛋糕[话题]# #新手蛋糕卷[话题]#',
+ 'title': '香妃蛋糕也太香了吧🔥不需要卷❗️绝对的友好',
+ 'tags': ['今日快乐今日发', '吃货薯看这里', '香妃蛋糕', '小五卷蛋糕', '新手蛋糕卷'],
+ 'duration': 101.726,
+ 'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[a-z0-9]+/[\w]+',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ initial_state = self._search_json(
+ r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', display_id, transform_source=js_to_json)
+
+ note_info = traverse_obj(initial_state, ('note', 'noteDetailMap', display_id, 'note'))
+ video_info = traverse_obj(note_info, ('video', 'media', 'stream', ('h264', 'av1', 'h265'), ...))
+
+ formats = []
+ for info in video_info:
+ format_info = traverse_obj(info, {
+ 'fps': ('fps', {int_or_none}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'vcodec': ('videoCodec', {str}),
+ 'acodec': ('audioCodec', {str}),
+ 'abr': ('audioBitrate', {int_or_none}),
+ 'vbr': ('videoBitrate', {int_or_none}),
+ 'audio_channels': ('audioChannels', {int_or_none}),
+ 'tbr': ('avgBitrate', {int_or_none}),
+ 'format': ('qualityType', {str}),
+ 'filesize': ('size', {int_or_none}),
+ 'duration': ('duration', {functools.partial(float_or_none, scale=1000)})
+ })
+
+ formats.extend(traverse_obj(info, (('mediaUrl', ('backupUrls', ...)), {
+ lambda u: url_or_none(u) and {'url': u, **format_info}})))
+
+ thumbnails = []
+ for image_info in traverse_obj(note_info, ('imageList', ...)):
+ thumbnail_info = traverse_obj(image_info, {
+ 'height': ('height', {int_or_none}),
+ 'width': ('width', {int_or_none}),
+ })
+ for thumb_url in traverse_obj(image_info, (('urlDefault', 'urlPre'), {url_or_none})):
+ thumbnails.append({
+ 'url': thumb_url,
+ **thumbnail_info,
+ })
+
+ return {
+ 'id': display_id,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'title': self._html_search_meta(['og:title'], webpage, default=None),
+ **traverse_obj(note_info, {
+ 'title': ('title', {str}),
+ 'description': ('desc', {str}),
+ 'tags': ('tagList', ..., 'name', {str}),
+ 'uploader_id': ('user', 'userId', {str}),
+ }),
+ }
diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py
index 1452aae..74d4f04 100644
--- a/yt_dlp/extractor/xnxx.py
+++ b/yt_dlp/extractor/xnxx.py
@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ NO_DEFAULT,
determine_ext,
int_or_none,
- NO_DEFAULT,
str_to_int,
)
diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py
index 8dd1cd9..322e865 100644
--- a/yt_dlp/extractor/xstream.py
+++ b/yt_dlp/extractor/xstream.py
@@ -2,11 +2,11 @@ import re
from .common import InfoExtractor
from ..utils import (
+ find_xpath_attr,
int_or_none,
parse_iso8601,
- xpath_with_ns,
xpath_text,
- find_xpath_attr,
+ xpath_with_ns,
)
diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py
index 5df0715..6b16ac2 100644
--- a/yt_dlp/extractor/xvideos.py
+++ b/yt_dlp/extractor/xvideos.py
@@ -3,9 +3,9 @@ import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
int_or_none,
parse_duration,
)
@@ -15,35 +15,35 @@ class XVideosIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:[^/]+\.)?xvideos2?\.com/video|
- (?:www\.)?xvideos\.es/video|
+ (?:[^/]+\.)?xvideos2?\.com/video\.?|
+ (?:www\.)?xvideos\.es/video\.?|
(?:www|flashservice)\.xvideos\.com/embedframe/|
static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
)
- (?P<id>[0-9]+)
+ (?P<id>[0-9a-z]+)
'''
_TESTS = [{
- 'url': 'https://www.xvideos.com/video4588838/motorcycle_guy_cucks_influencer_steals_his_gf',
- 'md5': '14cea69fcb84db54293b1e971466c2e1',
+ 'url': 'http://xvideos.com/video.ucuvbkfda4e/a_beautiful_red-haired_stranger_was_refused_but_still_came_to_my_room_for_sex',
+ 'md5': '396255a900a6bddb3e98985f0b86c3fd',
'info_dict': {
- 'id': '4588838',
+ 'id': 'ucuvbkfda4e',
'ext': 'mp4',
- 'title': 'Motorcycle Guy Cucks Influencer, Steals his GF',
- 'duration': 108,
+ 'title': 'A Beautiful Red-Haired Stranger Was Refused, But Still Came To My Room For Sex',
+ 'duration': 1238,
'age_limit': 18,
- 'thumbnail': r're:^https://img-hw.xvideos-cdn.com/.+\.jpg',
+ 'thumbnail': r're:^https://cdn\d+-pic.xvideos-cdn.com/.+\.jpg',
}
}, {
# Broken HLS formats
'url': 'https://www.xvideos.com/video65982001/what_s_her_name',
- 'md5': 'b82d7d7ef7d65a84b1fa6965f81f95a5',
+ 'md5': '56742808292c8fa1418e4538c262c58b',
'info_dict': {
'id': '65982001',
'ext': 'mp4',
'title': 'what\'s her name?',
'duration': 120,
'age_limit': 18,
- 'thumbnail': r're:^https://img-hw.xvideos-cdn.com/.+\.jpg',
+ 'thumbnail': r're:^https://cdn\d+-pic.xvideos-cdn.com/.+\.jpg',
}
}, {
'url': 'https://flashservice.xvideos.com/embedframe/4588838',
@@ -90,6 +90,18 @@ class XVideosIE(InfoExtractor):
}, {
'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl',
'only_matching': True
+ }, {
+ 'url': 'https://flashservice.xvideos.com/embedframe/ucuvbkfda4e',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.xvideos.com/embedframe/ucuvbkfda4e',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=ucuvbkfda4e',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xvideos.es/video.ucuvbkfda4e/a_beautiful_red-haired_stranger_was_refused_but_still_came_to_my_room_for_sex',
+ 'only_matching': True
}]
def _real_extract(self, url):
@@ -161,8 +173,41 @@ class XVideosIE(InfoExtractor):
class XVideosQuickiesIE(InfoExtractor):
IE_NAME = 'xvideos:quickies'
- _VALID_URL = r'https?://(?P<domain>(?:[^/]+\.)?xvideos2?\.com)/amateur-channels/[^#]+#quickies/a/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?P<domain>(?:[^/?#]+\.)?xvideos2?\.com)/(?:profiles/|amateur-channels/)?[^/?#]+#quickies/a/(?P<id>\w+)'
_TESTS = [{
+ 'url': 'https://www.xvideos.com/lili_love#quickies/a/ipdtikh1a4c',
+ 'md5': 'f9e4f518ff1de14b99a400bbd0fc5ee0',
+ 'info_dict': {
+ 'id': 'ipdtikh1a4c',
+ 'ext': 'mp4',
+ 'title': 'Mexican chichóna putisima',
+ 'age_limit': 18,
+ 'duration': 81,
+ 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+ }
+ }, {
+ 'url': 'https://www.xvideos.com/profiles/lili_love#quickies/a/ipphaob6fd1',
+ 'md5': '5340938aac6b46e19ebdd1d84535862e',
+ 'info_dict': {
+ 'id': 'ipphaob6fd1',
+ 'ext': 'mp4',
+ 'title': 'Puta chichona mexicana squirting',
+ 'age_limit': 18,
+ 'duration': 56,
+ 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+ }
+ }, {
+ 'url': 'https://www.xvideos.com/amateur-channels/lili_love#quickies/a/hfmffmd7661',
+ 'md5': '92428518bbabcb4c513e55922e022491',
+ 'info_dict': {
+ 'id': 'hfmffmd7661',
+ 'ext': 'mp4',
+ 'title': 'Chichona mexican slut',
+ 'age_limit': 18,
+ 'duration': 9,
+ 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+ }
+ }, {
'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
'md5': '16e322a93282667f1963915568f782c1',
'info_dict': {
@@ -177,4 +222,4 @@ class XVideosQuickiesIE(InfoExtractor):
def _real_extract(self, url):
domain, id_ = self._match_valid_url(url).group('domain', 'id')
- return self.url_result(f'https://{domain}/video{id_}/_', XVideosIE, id_)
+ return self.url_result(f'https://{domain}/video{"" if id_.isdecimal() else "."}{id_}/_', XVideosIE, id_)
diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py
index e3e3a9f..aa6c84d 100644
--- a/yt_dlp/extractor/xxxymovies.py
+++ b/yt_dlp/extractor/xxxymovies.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- parse_duration,
int_or_none,
+ parse_duration,
)
diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py
index 794dc3e..acfe69b 100644
--- a/yt_dlp/extractor/yandexmusic.py
+++ b/yt_dlp/extractor/yandexmusic.py
@@ -5,8 +5,8 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
+ int_or_none,
try_get,
)
diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py
index 4382a56..95a9446 100644
--- a/yt_dlp/extractor/yandexvideo.py
+++ b/yt_dlp/extractor/yandexvideo.py
@@ -259,15 +259,15 @@ class ZenYandexIE(InfoExtractor):
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
data_json = self._search_json(
r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
- serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)',
- webpage, 'server state').replace('State', 'Settings')
+ serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state')
uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
webpage, 'uploader', default='<a>')
uploader_name = extract_attributes(uploader).get('aria-label')
- video_json = try_get(data_json, lambda x: x[serverstate]['exportData']['video'], dict)
- stream_urls = try_get(video_json, lambda x: x['video']['streams'])
+ item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
+ video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
+
formats, subtitles = [], {}
- for s_url in stream_urls:
+ for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})):
ext = determine_ext(s_url)
if ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py
index 6ee0abc..0e047aa 100644
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@@ -1,19 +1,27 @@
+import itertools
import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
+ clean_html,
extract_attributes,
+ get_element_by_class,
+ get_element_by_id,
+ get_elements_html_by_class,
int_or_none,
merge_dicts,
- str_to_int,
+ parse_count,
+ parse_qs,
traverse_obj,
unified_strdate,
url_or_none,
+ urljoin,
)
class YouPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?/?(?:[#?]|$)'
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
_TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
@@ -34,7 +42,7 @@ class YouPornIE(InfoExtractor):
'tags': list,
'age_limit': 18,
},
- 'skip': 'This video has been disabled',
+ 'skip': 'This video has been deactivated',
}, {
# Unknown uploader
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@@ -72,15 +80,14 @@ class YouPornIE(InfoExtractor):
'id': '16290308',
'age_limit': 18,
'categories': [],
- 'description': 'md5:00ea70f642f431c379763c17c2f396bc',
'display_id': 'tinderspecial-trailer1',
'duration': 298.0,
'ext': 'mp4',
'upload_date': '20201123',
'uploader': 'Ersties',
'tags': [],
- 'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
- 'timestamp': 1606089600,
+ 'thumbnail': r're:https://.+\.jpg',
+ 'timestamp': 1606147564,
'title': 'Tinder In Real Life',
'view_count': int,
}
@@ -88,11 +95,27 @@ class YouPornIE(InfoExtractor):
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
- definitions = self._download_json(
- f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
+ self._set_cookie('.youporn.com', 'age_verified', '1')
+ webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
- def get_format_data(data, f):
- return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
+ watchable = self._search_regex(
+ r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
+ webpage, 'watchability', default=None)
+ if not watchable:
+ msg = re.split(r'\s{2}', clean_html(get_element_by_id('mainContent', webpage)) or '')[0]
+ raise ExtractorError(
+ f'{self.IE_NAME} says: {msg}' if msg else 'Video unavailable', expected=True)
+
+ player_vars = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)
+ definitions = player_vars['mediaDefinitions']
+
+ def get_format_data(data, stream_type):
+ info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
+ if not info_url:
+ return []
+ return traverse_obj(
+ self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
+ lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
formats = []
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@@ -123,10 +146,6 @@ class YouPornIE(InfoExtractor):
f['height'] = height
formats.append(f)
- webpage = self._download_webpage(
- 'http://www.youporn.com/watch/%s' % video_id, display_id,
- headers={'Cookie': 'age_verified=1'})
-
title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
webpage, 'title', default=None) or self._og_search_title(
@@ -141,8 +160,10 @@ class YouPornIE(InfoExtractor):
thumbnail = self._search_regex(
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
webpage, 'thumbnail', fatal=False, group='thumbnail')
- duration = int_or_none(self._html_search_meta(
- 'video:duration', webpage, 'duration', fatal=False))
+ duration = traverse_obj(player_vars, ('duration', {int_or_none}))
+ if duration is None:
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration', fatal=False))
uploader = self._html_search_regex(
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
@@ -158,11 +179,11 @@ class YouPornIE(InfoExtractor):
view_count = None
views = self._search_regex(
- r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
- 'views', default=None)
+ r'(<div [^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
+ webpage, 'views', default=None)
if views:
- view_count = str_to_int(extract_attributes(views).get('data-value'))
- comment_count = str_to_int(self._search_regex(
+ view_count = parse_count(extract_attributes(views).get('data-value'))
+ comment_count = parse_count(self._search_regex(
r'>All [Cc]omments? \(([\d,.]+)\)',
webpage, 'comment count', default=None))
@@ -180,7 +201,8 @@ class YouPornIE(InfoExtractor):
data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False)
data.pop('url', None)
- return merge_dicts(data, {
+
+ result = merge_dicts(data, {
'id': video_id,
'display_id': display_id,
'title': title,
@@ -196,3 +218,350 @@ class YouPornIE(InfoExtractor):
'age_limit': age_limit,
'formats': formats,
})
+
+ # Remove SEO spam "description"
+ description = result.get('description')
+ if description and description.startswith(f'Watch {result.get("title")} online'):
+ del result['description']
+
+ return result
+
+
+class YouPornListBase(InfoExtractor):
+ def _get_next_url(self, url, pl_id, html):
+ return urljoin(url, self._search_regex(
+ r'''<a [^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
+ get_element_by_id('next', html) or '', 'next page',
+ group='url', default=None))
+
+ @classmethod
+ def _get_title_from_slug(cls, title_slug):
+ return re.sub(r'[_-]', ' ', title_slug)
+
+ def _entries(self, url, pl_id, html=None, page_num=None):
+ start = page_num or 1
+ for page in itertools.count(start):
+ if not html:
+ html = self._download_webpage(
+ url, pl_id, note=f'Downloading page {page}', fatal=page == start)
+ if not html:
+ return
+ for element in get_elements_html_by_class('video-title', html):
+ if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})):
+ yield self.url_result(video_url)
+
+ if page_num is not None:
+ return
+ next_url = self._get_next_url(url, pl_id, html)
+ if not next_url or next_url == url:
+ return
+ url = next_url
+ html = None
+
+ def _real_extract(self, url, html=None):
+ m_dict = self._match_valid_url(url).groupdict()
+ pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
+ qs = {k: v[-1] for k, v in parse_qs(url).items() if v}
+
+ base_id = pl_id or 'YouPorn'
+ title = self._get_title_from_slug(base_id)
+ if page_type:
+ title = f'{page_type.capitalize()} {title}'
+ base_id = [base_id.lower()]
+ if sort is None:
+ title += ' videos'
+ else:
+ title = f'{title} videos by {re.sub(r"[_-]", " ", sort)}'
+ base_id.append(sort)
+ if qs:
+ filters = list(map('='.join, sorted(qs.items())))
+ title += f' ({",".join(filters)})'
+ base_id.extend(filters)
+ pl_id = '/'.join(base_id)
+
+ return self.playlist_result(
+ self._entries(url, pl_id, html=html, page_num=int_or_none(qs.get('page'))),
+ playlist_id=pl_id, playlist_title=title)
+
+
+class YouPornCategoryIE(YouPornListBase):
+ IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?P<type>category)/(?P<id>[^/?#&]+)
+ (?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/category/popular-with-women/popular/',
+ 'info_dict': {
+ 'id': 'popular-with-women/popular',
+ 'title': 'Category popular with women videos by popular',
+ },
+ 'playlist_mincount': 39,
+ }, {
+ 'note': 'Filtered paginated list with single page result',
+ 'url': 'https://www.youporn.com/category/popular-with-women/duration/?min_minutes=10',
+ 'info_dict': {
+ 'id': 'popular-with-women/duration/min_minutes=10',
+ 'title': 'Category popular with women videos by duration (min_minutes=10)',
+ },
+ 'playlist_mincount': 2,
+ # 'playlist_maxcount': 30,
+ }, {
+ 'note': 'Single page of full list',
+ 'url': 'https://www.youporn.com/category/popular-with-women/popular?page=1',
+ 'info_dict': {
+ 'id': 'popular-with-women/popular/page=1',
+ 'title': 'Category popular with women videos by popular (page=1)',
+ },
+ 'playlist_count': 36,
+ }]
+
+
+class YouPornChannelIE(YouPornListBase):
+ IE_DESC = 'YouPorn channel, with sorting and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?P<type>channel)/(?P<id>[^/?#&]+)
+ (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/channel/x-feeds/',
+ 'info_dict': {
+ 'id': 'x-feeds',
+ 'title': 'Channel X-Feeds videos',
+ },
+ 'playlist_mincount': 37,
+ }, {
+ 'note': 'Single page of full list (no filters here)',
+ 'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
+ 'info_dict': {
+ 'id': 'x-feeds/duration/page=1',
+ 'title': 'Channel X-Feeds videos by duration (page=1)',
+ },
+ 'playlist_count': 24,
+ }]
+
+ @staticmethod
+ def _get_title_from_slug(title_slug):
+ return re.sub(r'_', ' ', title_slug).title()
+
+
+class YouPornCollectionIE(YouPornListBase):
+ IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?P<type>collection)s/videos/(?P<id>\d+)
+ (?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/collections/videos/33044251/',
+ 'info_dict': {
+ 'id': '33044251',
+ 'title': 'Collection Sexy Lips videos',
+ 'uploader': 'ph-littlewillyb',
+ },
+ 'playlist_mincount': 50,
+ }, {
+ 'note': 'Single page of full list (no filters here)',
+ 'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
+ 'info_dict': {
+ 'id': '33044251/time/page=1',
+ 'title': 'Collection Sexy Lips videos by time (page=1)',
+ 'uploader': 'ph-littlewillyb',
+ },
+ 'playlist_count': 20,
+ }]
+
+ def _real_extract(self, url):
+ pl_id = self._match_id(url)
+ html = self._download_webpage(url, pl_id)
+ playlist = super()._real_extract(url, html=html)
+ infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
+ 'collection-infos', html)) or '')
+ title, uploader = self._search_regex(
+ r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
+ infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
+ if title:
+ playlist.update({
+ 'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
+ 'uploader': uploader,
+ })
+
+ return playlist
+
+
+class YouPornTagIE(YouPornListBase):
+ IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ porn(?P<type>tag)s/(?P<id>[^/?#&]+)
+ (?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/porntags/austrian',
+ 'info_dict': {
+ 'id': 'austrian',
+ 'title': 'Tag austrian videos',
+ },
+ 'playlist_mincount': 33,
+ 'expected_warnings': ['YouPorn tag pages are not correctly cached'],
+ }, {
+ 'note': 'Filtered paginated list with single page result',
+ 'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
+ 'info_dict': {
+ 'id': 'austrian/duration/min_minutes=10',
+ 'title': 'Tag austrian videos by duration (min_minutes=10)',
+ },
+ 'playlist_mincount': 10,
+ # number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
+ # or more, varying with number of ads; let's set max as 9x4
+ # NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
+ # 'playlist_maxcount': 32,
+ 'expected_warnings': ['YouPorn tag pages are not correctly cached'],
+ }, {
+ 'note': 'Single page of full list',
+ 'url': 'https://www.youporn.com/porntags/austrian/?page=1',
+ 'info_dict': {
+ 'id': 'austrian/page=1',
+ 'title': 'Tag austrian videos (page=1)',
+ },
+ 'playlist_mincount': 32,
+ # 'playlist_maxcount': 34,
+ 'expected_warnings': ['YouPorn tag pages are not correctly cached'],
+ }]
+
+ def _real_extract(self, url):
+ self.report_warning(
+ 'YouPorn tag pages are not correctly cached and '
+ 'often return incorrect results', only_once=True)
+ return super()._real_extract(url)
+
+
+class YouPornStarIE(YouPornListBase):
+ IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?P<type>pornstar)/(?P<id>[^/?#&]+)
+ (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/pornstar/daynia/',
+ 'info_dict': {
+ 'id': 'daynia',
+ 'title': 'Pornstar Daynia videos',
+ 'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
+ },
+ 'playlist_mincount': 40,
+ }, {
+ 'note': 'Single page of full list (no filters here)',
+ 'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
+ 'info_dict': {
+ 'id': 'daynia/page=1',
+ 'title': 'Pornstar Daynia videos (page=1)',
+ 'description': 're:.{180,}',
+ },
+ 'playlist_count': 26,
+ }]
+
+ @staticmethod
+ def _get_title_from_slug(title_slug):
+ return re.sub(r'_', ' ', title_slug).title()
+
+ def _real_extract(self, url):
+ pl_id = self._match_id(url)
+ html = self._download_webpage(url, pl_id)
+ playlist = super()._real_extract(url, html=html)
+ INFO_ELEMENT_RE = r'''(?x)
+ <div [^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
+ (?P<info>[\s\S]+?)(?:</div>\s*){6,}
+ '''
+
+ if infos := self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default=''):
+ infos = re.sub(
+ r'(?:\s*nl=nl)+\s*', ' ',
+ re.sub(r'(?u)\s+', ' ', clean_html(re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
+
+ return {
+ **playlist,
+ 'description': infos.strip() or None,
+ }
+
+
+class YouPornVideosIE(YouPornListBase):
+ IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?:(?P<id>browse)/)?
+ (?P<sort>(?(id)
+ (?:duration|rating|time|views)|
+ (?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
+ (?:[/#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination (too long for test)',
+ 'url': 'https://www.youporn.com/',
+ 'info_dict': {
+ 'id': 'youporn',
+ 'title': 'YouPorn videos',
+ },
+ 'only_matching': True,
+ }, {
+ 'note': 'Full list with pagination (too long for test)',
+ 'url': 'https://www.youporn.com/recommended',
+ 'info_dict': {
+ 'id': 'youporn/recommended',
+ 'title': 'YouPorn videos by recommended',
+ },
+ 'only_matching': True,
+ }, {
+ 'note': 'Full list with pagination (too long for test)',
+ 'url': 'https://www.youporn.com/top_rated',
+ 'info_dict': {
+ 'id': 'youporn/top_rated',
+ 'title': 'YouPorn videos by top rated',
+ },
+ 'only_matching': True,
+ }, {
+ 'note': 'Full list with pagination (too long for test)',
+ 'url': 'https://www.youporn.com/browse/time',
+ 'info_dict': {
+ 'id': 'browse/time',
+ 'title': 'YouPorn videos by time',
+ },
+ 'only_matching': True,
+ }, {
+ 'note': 'Filtered paginated list with single page result',
+ 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
+ 'info_dict': {
+ 'id': 'youporn/most_favorited/max_minutes=2/res=VR',
+ 'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
+ },
+ 'playlist_mincount': 10,
+ # 'playlist_maxcount': 28,
+ }, {
+ 'note': 'Filtered paginated list with several pages',
+ 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
+ 'info_dict': {
+ 'id': 'youporn/most_favorited/max_minutes=5/res=VR',
+ 'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
+ },
+ 'playlist_mincount': 45,
+ }, {
+ 'note': 'Single page of full list',
+ 'url': 'https://www.youporn.com/browse/time?page=1',
+ 'info_dict': {
+ 'id': 'browse/time/page=1',
+ 'title': 'YouPorn videos by time (page=1)',
+ },
+ 'playlist_count': 36,
+ }]
+
+ @staticmethod
+ def _get_title_from_slug(title_slug):
+ return 'YouPorn' if title_slug == 'browse' else title_slug
diff --git a/yt_dlp/extractor/yourporn.py b/yt_dlp/extractor/yourporn.py
deleted file mode 100644
index 38f42a9..0000000
--- a/yt_dlp/extractor/yourporn.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- parse_duration,
- urljoin,
-)
-
-
-class YourPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
- _TESTS = [{
- 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
- 'md5': '6f8682b6464033d87acaa7a8ff0c092e',
- 'info_dict': {
- 'id': '57ffcb2e1179b',
- 'ext': 'mp4',
- 'title': 'md5:c9f43630bd968267672651ba905a7d35',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 165,
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- parts = self._parse_json(
- self._search_regex(
- r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
- group='data'),
- video_id)[video_id].split('/')
-
- num = 0
- for c in parts[6] + parts[7]:
- if c.isnumeric():
- num += int(c)
- parts[5] = compat_str(int(parts[5]) - num)
- parts[1] += '8'
- video_url = urljoin(url, '/'.join(parts))
-
- title = (self._search_regex(
- r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
- default=None) or self._og_search_description(webpage)).strip()
- thumbnail = self._og_search_thumbnail(webpage)
- duration = parse_duration(self._search_regex(
- r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
- default=None))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'age_limit': 18,
- 'ext': 'mp4',
- }
diff --git a/yt_dlp/extractor/yourupload.py b/yt_dlp/extractor/yourupload.py
deleted file mode 100644
index def6329..0000000
--- a/yt_dlp/extractor/yourupload.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from .common import InfoExtractor
-from ..utils import urljoin
-
-
-class YourUploadIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P<id>[A-Za-z0-9]+)'
- _TESTS = [{
- 'url': 'http://yourupload.com/watch/14i14h',
- 'md5': '5e2c63385454c557f97c4c4131a393cd',
- 'info_dict': {
- 'id': '14i14h',
- 'ext': 'mp4',
- 'title': 'BigBuckBunny_320x180.mp4',
- 'thumbnail': r're:^https?://.*\.jpe?g',
- }
- }, {
- 'url': 'http://www.yourupload.com/embed/14i14h',
- 'only_matching': True,
- }, {
- 'url': 'http://embed.yourupload.com/14i14h',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- embed_url = 'http://www.yourupload.com/embed/%s' % video_id
-
- webpage = self._download_webpage(embed_url, video_id)
-
- title = self._og_search_title(webpage)
- video_url = urljoin(embed_url, self._og_search_video_url(webpage))
- thumbnail = self._og_search_thumbnail(webpage, default=None)
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': video_url,
- 'thumbnail': thumbnail,
- 'http_headers': {
- 'Referer': embed_url,
- },
- }
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 33fd3b4..54da4e3 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2,7 +2,7 @@ import base64
import calendar
import collections
import copy
-import datetime
+import datetime as dt
import enum
import hashlib
import itertools
@@ -33,6 +33,7 @@ from ..utils import (
clean_html,
datetime_from_str,
dict_get,
+ filesize_from_tbr,
filter_dict,
float_or_none,
format_field,
@@ -55,6 +56,7 @@ from ..utils import (
str_to_int,
strftime_or_none,
traverse_obj,
+ try_call,
try_get,
unescapeHTML,
unified_strdate,
@@ -238,6 +240,16 @@ INNERTUBE_CLIENTS = {
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 85
},
+ # This client has pre-merged video+audio 720p/1080p streams
+ 'mediaconnect': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'MEDIA_CONNECT_FRONTEND',
+ 'clientVersion': '0.1',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 95
+ },
}
@@ -922,10 +934,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _parse_time_text(self, text):
if not text:
return
- dt = self.extract_relative_time(text)
+ dt_ = self.extract_relative_time(text)
timestamp = None
- if isinstance(dt, datetime.datetime):
- timestamp = calendar.timegm(dt.timetuple())
+ if isinstance(dt_, dt.datetime):
+ timestamp = calendar.timegm(dt_.timetuple())
if timestamp is None:
timestamp = (
@@ -1169,7 +1181,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
- _formats = {
+ _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
@@ -1313,6 +1325,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
+ 'timestamp': 1349198244,
}
},
{
@@ -1356,6 +1369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
+ 'timestamp': 1349198244,
},
'params': {
'skip_download': True,
@@ -1442,6 +1456,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1401991663,
},
},
{
@@ -1501,6 +1516,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Projekt Melody',
'uploader_url': 'https://www.youtube.com/@ProjektMelody',
'uploader_id': '@ProjektMelody',
+ 'timestamp': 1577508724,
},
},
{
@@ -1606,6 +1622,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@Olympics',
'uploader_id': '@Olympics',
'channel_is_verified': True,
+ 'timestamp': 1440707674,
},
'params': {
'skip_download': 'requires avconv',
@@ -1639,6 +1656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': '孫ᄋᄅ',
'uploader_url': 'https://www.youtube.com/@AllenMeow',
'uploader_id': '@AllenMeow',
+ 'timestamp': 1299776999,
},
},
# url_encoded_fmt_stream_map is empty string
@@ -1782,6 +1800,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
}],
'params': {'skip_download': True},
+ 'skip': 'Not multifeed anymore',
},
{
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
@@ -1890,6 +1909,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'The Berkman Klein Center for Internet & Society',
'uploader_id': '@BKCHarvard',
'uploader_url': 'https://www.youtube.com/@BKCHarvard',
+ 'timestamp': 1422422076,
},
'params': {
'skip_download': True,
@@ -1925,6 +1945,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@BernieSanders',
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1447987198,
},
'params': {
'skip_download': True,
@@ -1988,6 +2009,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@Vsauce',
'comment_count': int,
'channel_is_verified': True,
+ 'timestamp': 1484761047,
},
'params': {
'skip_download': True,
@@ -2143,6 +2165,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'l\'Or Vert asbl',
'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
'uploader_id': '@ElevageOrVert',
+ 'timestamp': 1497343210,
},
'params': {
'skip_download': True,
@@ -2181,6 +2204,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@Csharp-video-tutorialsBlogspot',
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1377976349,
},
'params': {
'skip_download': True,
@@ -2263,6 +2287,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@CBSMornings',
'comment_count': int,
'channel_is_verified': True,
+ 'timestamp': 1405513526,
}
},
{
@@ -2280,7 +2305,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'view_count': int,
'channel': 'Walk around Japan',
'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
- 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
+ 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
'age_limit': 0,
'availability': 'public',
'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
@@ -2290,6 +2315,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Walk around Japan',
'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
'uploader_id': '@walkaroundjapan7124',
+ 'timestamp': 1605884416,
},
'params': {
'skip_download': True,
@@ -2341,6 +2367,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'format': '17', # 3gp format available on android
'extractor_args': {'youtube': {'player_client': ['android']}},
},
+ 'skip': 'android client broken',
},
{
# Skip download of additional client configs (remix client config in this case)
@@ -2384,6 +2411,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1395685455,
}, 'params': {'format': 'mhtml', 'skip_download': True}
}, {
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@@ -2413,38 +2441,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
'uploader_id': '@LeonNguyen',
'heatmap': 'count:100',
+ 'timestamp': 1641170939,
}
}, {
- # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
- 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
- 'info_dict': {
- 'id': '2NUZ8W2llS4',
- 'ext': 'mp4',
- 'title': 'The NP that test your phone performance 🙂',
- 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
- 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
- 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
- 'duration': 21,
- 'view_count': int,
- 'age_limit': 0,
- 'categories': ['Gaming'],
- 'tags': 'count:23',
- 'playable_in_embed': True,
- 'live_status': 'not_live',
- 'upload_date': '20220102',
- 'like_count': int,
- 'availability': 'public',
- 'channel': 'Leon Nguyen',
- 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
- 'comment_count': int,
- 'channel_follower_count': int,
- 'uploader': 'Leon Nguyen',
- 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
- 'uploader_id': '@LeonNguyen',
- 'heatmap': 'count:100',
- },
- 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
- }, {
# date text is premiered video, ensure upload date in UTC (published 1641172509)
'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
'info_dict': {
@@ -2475,38 +2474,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1641172509,
}
},
- { # continuous livestream. Microformat upload date should be preferred.
- # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
- 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
+ { # continuous livestream.
+ # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
+ 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
'info_dict': {
- 'id': 'kgx4WGK0oNU',
- 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'id': 'jfKfPfyJRdk',
'ext': 'mp4',
- 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
- 'availability': 'public',
+ 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
+ 'like_count': int,
+ 'uploader': 'Lofi Girl',
+ 'categories': ['Music'],
+ 'concurrent_view_count': int,
+ 'playable_in_embed': True,
+ 'timestamp': 1657627949,
+ 'release_date': '20220712',
+ 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
+ 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
'age_limit': 0,
- 'release_timestamp': 1637975704,
- 'upload_date': '20210619',
- 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
- 'live_status': 'is_live',
- 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
- 'channel': 'Abao in Tokyo',
+ 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
+ 'release_timestamp': 1657641570,
+ 'uploader_url': 'https://www.youtube.com/@LofiGirl',
'channel_follower_count': int,
- 'release_date': '20211127',
- 'tags': 'count:39',
- 'categories': ['People & Blogs'],
- 'like_count': int,
+ 'channel_is_verified': True,
+ 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
'view_count': int,
- 'playable_in_embed': True,
- 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
- 'concurrent_view_count': int,
- 'uploader': 'Abao in Tokyo',
- 'uploader_url': 'https://www.youtube.com/@abaointokyo',
- 'uploader_id': '@abaointokyo',
+ 'live_status': 'is_live',
+ 'tags': 'count:32',
+ 'channel': 'Lofi Girl',
+ 'availability': 'public',
+ 'upload_date': '20220712',
+ 'uploader_id': '@LofiGirl',
},
- 'params': {'skip_download': True}
+ 'params': {'skip_download': True},
}, {
'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
'info_dict': {
@@ -2532,6 +2534,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@lesmiscore',
'uploader': 'Lesmiscore',
'uploader_url': 'https://www.youtube.com/@lesmiscore',
+ 'timestamp': 1648005313,
}
}, {
# Prefer primary title+description language metadata by default
@@ -2559,6 +2562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@coletdjnz',
'uploader': 'cole-dlp-test-acc',
+ 'timestamp': 1662677394,
},
'params': {'skip_download': True}
}, {
@@ -2572,7 +2576,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 5,
'live_status': 'not_live',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
- 'upload_date': '20220728',
+ 'upload_date': '20220729',
'view_count': int,
'categories': ['People & Blogs'],
'thumbnail': r're:^https?://.*\.jpg',
@@ -2585,6 +2589,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@coletdjnz',
'uploader': 'cole-dlp-test-acc',
+ 'timestamp': 1659073275,
+ 'like_count': int,
},
'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
'expected_warnings': [r'Preferring "fr" translated fields'],
@@ -2650,6 +2656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Projekt Melody',
'uploader_id': '@ProjektMelody',
'uploader_url': 'https://www.youtube.com/@ProjektMelody',
+ 'timestamp': 1577508724,
},
'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
},
@@ -2684,6 +2691,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@sana_natori',
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1671798112,
},
},
{
@@ -2718,7 +2726,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'heatmap': 'count:100',
},
'params': {
- 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
+ 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
},
},
]
@@ -2753,6 +2761,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
'uploader_id': '@ChristopherSykesDocumentaries',
'heatmap': 'count:100',
+ 'timestamp': 1211825920,
},
'params': {
'skip_download': True,
@@ -3305,7 +3314,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'value': ('intensityScoreNormalized', {float_or_none}),
})) or None
- def _extract_comment(self, comment_renderer, parent=None):
+ def _extract_comment(self, entities, parent=None):
+ comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
+ if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
+ return
+
+ toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
+ time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
+
+ return {
+ 'id': comment_id,
+ 'parent': parent or 'root',
+ **traverse_obj(comment_entity_payload, {
+ 'text': ('properties', 'content', 'content', {str}),
+ 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
+ 'author_id': ('author', 'channelId', {self.ucid_or_none}),
+ 'author': ('author', 'displayName', {str}),
+ 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
+ 'author_is_uploader': ('author', 'isCreator', {bool}),
+ 'author_is_verified': ('author', 'isVerified', {bool}),
+ 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
+ ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url')
+ ), {lambda x: urljoin('https://www.youtube.com', x)}),
+ }, get_all=False),
+ 'is_favorited': (None if toolbar_entity_payload is None else
+ toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
+ '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
+ 'timestamp': self._parse_time_text(time_text),
+ }
+
+ def _extract_comment_old(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
if not comment_id:
return
@@ -3386,21 +3424,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
return _continuation
- def extract_thread(contents):
+ def extract_thread(contents, entity_payloads):
if not parent:
tracker['current_page_thread'] = 0
for content in contents:
if not parent and tracker['total_parent_comments'] >= max_parents:
yield
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
- comment_renderer = get_first(
- (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
- expected_type=dict, default={})
- comment = self._extract_comment(comment_renderer, parent)
+ # old comment format
+ if not entity_payloads:
+ comment_renderer = get_first(
+ (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
+ expected_type=dict, default={})
+
+ comment = self._extract_comment_old(comment_renderer, parent)
+
+ # new comment format
+ else:
+ view_model = (
+ traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
+ or traverse_obj(content, ('commentViewModel', {dict})))
+ comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
+ if not comment_keys:
+ continue
+ entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
+ comment = self._extract_comment(entities, parent)
+ if comment:
+ comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
+
if not comment:
continue
comment_id = comment['id']
+
if comment.get('is_pinned'):
tracker['pinned_comment_ids'].add(comment_id)
# Sometimes YouTube may break and give us infinite looping comments.
@@ -3493,7 +3549,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
check_get_keys = None
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
check_get_keys = [[*continuation_items_path, ..., (
- 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
+ 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
try:
response = self._extract_response(
item_id=None, query=continuation,
@@ -3517,6 +3573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise
is_forced_continuation = False
continuation = None
+ mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
if is_first_continuation:
continuation = extract_header(continuation_items)
@@ -3525,7 +3582,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
continue
- for entry in extract_thread(continuation_items):
+ for entry in extract_thread(continuation_items, mutations):
if not entry:
return
yield entry
@@ -3602,8 +3659,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
yt_query = {
'videoId': video_id,
}
- if _split_innertube_client(client)[0] == 'android':
- yt_query['params'] = 'CgIQBg=='
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
if pp_arg:
@@ -3619,19 +3674,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
- default = ['ios', 'android', 'web']
+ android_clients = []
+ default = ['ios', 'web']
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
for client in self._configuration_arg('player_client'):
- if client in allowed_clients:
- requested_clients.append(client)
- elif client == 'default':
+ if client == 'default':
requested_clients.extend(default)
elif client == 'all':
requested_clients.extend(allowed_clients)
- else:
+ elif client not in allowed_clients:
self.report_warning(f'Skipping unsupported client {client}')
+ elif client.startswith('android'):
+ android_clients.append(client)
+ else:
+ requested_clients.append(client)
+ # Force deprioritization of broken Android clients for format de-duplication
+ requested_clients.extend(android_clients)
if not requested_clients:
requested_clients = default
@@ -3839,16 +3899,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
10 if audio_track.get('audioIsDefault') and 10
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1)
+ format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
# Make sure to avoid false positives with small duration differences.
# E.g. __2ABJjxzNo, ySuUZEjARPY
- is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
+ is_damaged = try_call(lambda: format_duration < duration // 2)
if is_damaged:
self.report_warning(
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
+ # Android client formats are broken due to integrity check enforcement
+ # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
+ is_broken = client_name and client_name.startswith(short_client_name('android'))
+ if is_broken:
+ self.report_warning(
+ f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
+ 'They will be deprioritized', only_once=True)
+
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
fps = int_or_none(fmt.get('fps')) or 0
dct = {
@@ -3861,7 +3930,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
- throttled and 'THROTTLED', is_damaged and 'DAMAGED',
+ throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
(self.get_param('verbose') or all_formats) and client_name,
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
@@ -3873,13 +3942,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
'has_drm': bool(fmt.get('drmFamilies')),
'tbr': tbr,
+ 'filesize_approx': filesize_from_tbr(tbr, format_duration),
'url': fmt_url,
'width': int_or_none(fmt.get('width')),
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else '') or None,
'language_preference': language_preference,
- # Strictly de-prioritize damaged and 3gp formats
- 'preference': -10 if is_damaged else -2 if itag == '17' else None,
+ # Strictly de-prioritize broken, damaged and 3gp formats
+ 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
@@ -4548,23 +4618,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
})
+
+ # We only want timestamp IF it has time precision AND a timezone
+ # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
+ timestamp = (
+ parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
+ or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
+ )
+ upload_date = (
+ dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
+ (
+ unified_strdate(get_first(microformats, 'uploadDate'))
+ or unified_strdate(search_meta('uploadDate'))
+ ))
+
+ # In the case we cannot get the timestamp:
# The upload date for scheduled, live and past live streams / premieres in microformats
# may be different from the stream date. Although not in UTC, we will prefer it in this case.
# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
- upload_date = (
- unified_strdate(get_first(microformats, 'uploadDate'))
- or unified_strdate(search_meta('uploadDate')))
- if not upload_date or (
- live_status in ('not_live', None)
- and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
- ):
+ if not upload_date or (not timestamp and live_status in ('not_live', None)):
+ # this should be in UTC, as configured in the cookie/client context
upload_date = strftime_or_none(
self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
+
info['upload_date'] = upload_date
+ info['timestamp'] = timestamp
if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
# Newly uploaded videos' HLS formats are potentially problematic and need to be checked
- upload_datetime = datetime_from_str(upload_date).replace(tzinfo=datetime.timezone.utc)
+ upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
if upload_datetime >= datetime_from_str('today-2days'):
for fmt in info['formats']:
if fmt.get('protocol') == 'm3u8_native':
@@ -6965,7 +7047,7 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
IE_DESC = 'YouTube search'
IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch'
- _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
+ _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
_TESTS = [{
'url': 'ytsearch5:youtube-dl test video',
'playlist_count': 5,
@@ -6973,6 +7055,14 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
'id': 'youtube-dl test video',
'title': 'youtube-dl test video',
}
+ }, {
+ 'note': 'Suicide/self-harm search warning',
+ 'url': 'ytsearch1:i hate myself and i wanna die',
+ 'playlist_count': 1,
+ 'info_dict': {
+ 'id': 'i hate myself and i wanna die',
+ 'title': 'i hate myself and i wanna die',
+ }
}]
@@ -6980,7 +7070,7 @@ class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube search, newest videos first'
- _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
+ _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
_TESTS = [{
'url': 'ytsearchdate5:youtube-dl test video',
'playlist_count': 5,
diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py
index 88f526b..2a12aa5 100644
--- a/yt_dlp/extractor/zapiks.py
+++ b/yt_dlp/extractor/zapiks.py
@@ -2,11 +2,11 @@ import re
from .common import InfoExtractor
from ..utils import (
+ int_or_none,
parse_duration,
parse_iso8601,
- xpath_with_ns,
xpath_text,
- int_or_none,
+ xpath_with_ns,
)
diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py
index 6bd9ea0..5cc9c5f 100644
--- a/yt_dlp/extractor/zattoo.py
+++ b/yt_dlp/extractor/zattoo.py
@@ -1,5 +1,5 @@
import re
-from uuid import uuid4
+import uuid
from .common import InfoExtractor
from ..compat import compat_str
@@ -53,7 +53,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
self._request_webpage(
'%s/zapi/v3/session/hello' % self._host_url(), None,
'Opening session', data=urlencode_postdata({
- 'uuid': compat_str(uuid4()),
+ 'uuid': compat_str(uuid.uuid4()),
'lang': 'en',
'app_version': '1.8.2',
'format': 'json',
diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py
index c24b338..18b22a5 100644
--- a/yt_dlp/extractor/zhihu.py
+++ b/yt_dlp/extractor/zhihu.py
@@ -1,5 +1,5 @@
from .common import InfoExtractor
-from ..utils import format_field, float_or_none, int_or_none
+from ..utils import float_or_none, format_field, int_or_none
class ZhihuIE(InfoExtractor):
diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py
index ff5eac8..909a7a3 100644
--- a/yt_dlp/extractor/zingmp3.py
+++ b/yt_dlp/extractor/zingmp3.py
@@ -10,8 +10,8 @@ from ..utils import (
int_or_none,
join_nonempty,
try_call,
+ url_or_none,
urljoin,
- url_or_none
)
from ..utils.traversal import traverse_obj
diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py
index 2f3b4c4..8d3156d 100644
--- a/yt_dlp/extractor/zype.py
+++ b/yt_dlp/extractor/zype.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
int_or_none,
js_to_json,
parse_iso8601,
diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py
index acadc01..356712c 100644
--- a/yt_dlp/networking/__init__.py
+++ b/yt_dlp/networking/__init__.py
@@ -28,3 +28,10 @@ except ImportError:
pass
except Exception as e:
warnings.warn(f'Failed to import "websockets" request handler: {e}' + bug_reports_message())
+
+try:
+ from . import _curlcffi # noqa: F401
+except ImportError:
+ pass
+except Exception as e:
+ warnings.warn(f'Failed to import "curl_cffi" request handler: {e}' + bug_reports_message())
diff --git a/yt_dlp/networking/_curlcffi.py b/yt_dlp/networking/_curlcffi.py
new file mode 100644
index 0000000..f2df399
--- /dev/null
+++ b/yt_dlp/networking/_curlcffi.py
@@ -0,0 +1,241 @@
+from __future__ import annotations
+
+import io
+import math
+import urllib.parse
+
+from ._helper import InstanceStoreMixin, select_proxy
+from .common import (
+ Features,
+ Request,
+ Response,
+ register_preference,
+ register_rh,
+)
+from .exceptions import (
+ CertificateVerifyError,
+ HTTPError,
+ IncompleteRead,
+ ProxyError,
+ SSLError,
+ TransportError,
+)
+from .impersonate import ImpersonateRequestHandler, ImpersonateTarget
+from ..dependencies import curl_cffi, certifi
+from ..utils import int_or_none
+
+if curl_cffi is None:
+ raise ImportError('curl_cffi is not installed')
+
+curl_cffi_version = tuple(int_or_none(x, default=0) for x in curl_cffi.__version__.split('.'))
+
+if curl_cffi_version != (0, 5, 10):
+ curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)'
+ raise ImportError('Only curl_cffi 0.5.10 is supported')
+
+import curl_cffi.requests
+from curl_cffi.const import CurlECode, CurlOpt
+
+
+class CurlCFFIResponseReader(io.IOBase):
+ def __init__(self, response: curl_cffi.requests.Response):
+ self._response = response
+ self._iterator = response.iter_content()
+ self._buffer = b''
+ self.bytes_read = 0
+
+ def readable(self):
+ return True
+
+ def read(self, size=None):
+ exception_raised = True
+ try:
+ while self._iterator and (size is None or len(self._buffer) < size):
+ chunk = next(self._iterator, None)
+ if chunk is None:
+ self._iterator = None
+ break
+ self._buffer += chunk
+ self.bytes_read += len(chunk)
+
+ if size is None:
+ size = len(self._buffer)
+ data = self._buffer[:size]
+ self._buffer = self._buffer[size:]
+
+ # "free" the curl instance if the response is fully read.
+ # curl_cffi doesn't do this automatically and only allows one open response per thread
+ if not self._iterator and not self._buffer:
+ self.close()
+ exception_raised = False
+ return data
+ finally:
+ if exception_raised:
+ self.close()
+
+ def close(self):
+ if not self.closed:
+ self._response.close()
+ self._buffer = b''
+ super().close()
+
+
+class CurlCFFIResponseAdapter(Response):
+ fp: CurlCFFIResponseReader
+
+ def __init__(self, response: curl_cffi.requests.Response):
+ super().__init__(
+ fp=CurlCFFIResponseReader(response),
+ headers=response.headers,
+ url=response.url,
+ status=response.status_code)
+
+ def read(self, amt=None):
+ try:
+ return self.fp.read(amt)
+ except curl_cffi.requests.errors.RequestsError as e:
+ if e.code == CurlECode.PARTIAL_FILE:
+ content_length = int_or_none(e.response.headers.get('Content-Length'))
+ raise IncompleteRead(
+ partial=self.fp.bytes_read,
+ expected=content_length - self.fp.bytes_read if content_length is not None else None,
+ cause=e) from e
+ raise TransportError(cause=e) from e
+
+
+@register_rh
+class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
+ RH_NAME = 'curl_cffi'
+ _SUPPORTED_URL_SCHEMES = ('http', 'https')
+ _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
+ _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
+ _SUPPORTED_IMPERSONATE_TARGET_MAP = {
+ ImpersonateTarget('chrome', '110', 'windows', '10'): curl_cffi.requests.BrowserType.chrome110,
+ ImpersonateTarget('chrome', '107', 'windows', '10'): curl_cffi.requests.BrowserType.chrome107,
+ ImpersonateTarget('chrome', '104', 'windows', '10'): curl_cffi.requests.BrowserType.chrome104,
+ ImpersonateTarget('chrome', '101', 'windows', '10'): curl_cffi.requests.BrowserType.chrome101,
+ ImpersonateTarget('chrome', '100', 'windows', '10'): curl_cffi.requests.BrowserType.chrome100,
+ ImpersonateTarget('chrome', '99', 'windows', '10'): curl_cffi.requests.BrowserType.chrome99,
+ ImpersonateTarget('edge', '101', 'windows', '10'): curl_cffi.requests.BrowserType.edge101,
+ ImpersonateTarget('edge', '99', 'windows', '10'): curl_cffi.requests.BrowserType.edge99,
+ ImpersonateTarget('safari', '15.5', 'macos', '12'): curl_cffi.requests.BrowserType.safari15_5,
+ ImpersonateTarget('safari', '15.3', 'macos', '11'): curl_cffi.requests.BrowserType.safari15_3,
+ ImpersonateTarget('chrome', '99', 'android', '12'): curl_cffi.requests.BrowserType.chrome99_android,
+ }
+
+ def _create_instance(self, cookiejar=None):
+ return curl_cffi.requests.Session(cookies=cookiejar)
+
+ def _check_extensions(self, extensions):
+ super()._check_extensions(extensions)
+ extensions.pop('impersonate', None)
+ extensions.pop('cookiejar', None)
+ extensions.pop('timeout', None)
+
+ def send(self, request: Request) -> Response:
+ target = self._get_request_target(request)
+ try:
+ response = super().send(request)
+ except HTTPError as e:
+ e.response.extensions['impersonate'] = target
+ raise
+ response.extensions['impersonate'] = target
+ return response
+
+ def _send(self, request: Request):
+ max_redirects_exceeded = False
+ session: curl_cffi.requests.Session = self._get_instance(
+ cookiejar=self._get_cookiejar(request) if 'cookie' not in request.headers else None)
+
+ if self.verbose:
+ session.curl.setopt(CurlOpt.VERBOSE, 1)
+
+ proxies = self._get_proxies(request)
+ if 'no' in proxies:
+ session.curl.setopt(CurlOpt.NOPROXY, proxies['no'])
+ proxies.pop('no', None)
+
+ # curl doesn't support per protocol proxies, so we select the one that matches the request protocol
+ proxy = select_proxy(request.url, proxies=proxies)
+ if proxy:
+ session.curl.setopt(CurlOpt.PROXY, proxy)
+ scheme = urllib.parse.urlparse(request.url).scheme.lower()
+ if scheme != 'http':
+ # Enable HTTP CONNECT for HTTPS urls.
+ # Don't use CONNECT for http for compatibility with urllib behaviour.
+ # See: https://curl.se/libcurl/c/CURLOPT_HTTPPROXYTUNNEL.html
+ session.curl.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)
+
+ # curl_cffi does not currently set these for proxies
+ session.curl.setopt(CurlOpt.PROXY_CAINFO, certifi.where())
+
+ if not self.verify:
+ session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYPEER, 0)
+ session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYHOST, 0)
+
+ headers = self._get_impersonate_headers(request)
+
+ if self._client_cert:
+ session.curl.setopt(CurlOpt.SSLCERT, self._client_cert['client_certificate'])
+ client_certificate_key = self._client_cert.get('client_certificate_key')
+ client_certificate_password = self._client_cert.get('client_certificate_password')
+ if client_certificate_key:
+ session.curl.setopt(CurlOpt.SSLKEY, client_certificate_key)
+ if client_certificate_password:
+ session.curl.setopt(CurlOpt.KEYPASSWD, client_certificate_password)
+
+ timeout = self._calculate_timeout(request)
+
+ # set CURLOPT_LOW_SPEED_LIMIT and CURLOPT_LOW_SPEED_TIME to act as a read timeout. [1]
+ # curl_cffi does not currently do this. [2]
+ # Note: CURLOPT_LOW_SPEED_TIME is in seconds, so we need to round up to the nearest second. [3]
+ # [1] https://unix.stackexchange.com/a/305311
+ # [2] https://github.com/yifeikong/curl_cffi/issues/156
+ # [3] https://curl.se/libcurl/c/CURLOPT_LOW_SPEED_TIME.html
+ session.curl.setopt(CurlOpt.LOW_SPEED_LIMIT, 1) # 1 byte per second
+ session.curl.setopt(CurlOpt.LOW_SPEED_TIME, math.ceil(timeout))
+
+ try:
+ curl_response = session.request(
+ method=request.method,
+ url=request.url,
+ headers=headers,
+ data=request.data,
+ verify=self.verify,
+ max_redirects=5,
+ timeout=timeout,
+ impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get(
+ self._get_request_target(request)),
+ interface=self.source_address,
+ stream=True
+ )
+ except curl_cffi.requests.errors.RequestsError as e:
+ if e.code == CurlECode.PEER_FAILED_VERIFICATION:
+ raise CertificateVerifyError(cause=e) from e
+
+ elif e.code == CurlECode.SSL_CONNECT_ERROR:
+ raise SSLError(cause=e) from e
+
+ elif e.code == CurlECode.TOO_MANY_REDIRECTS:
+ max_redirects_exceeded = True
+ curl_response = e.response
+
+ elif (
+ e.code == CurlECode.PROXY
+ or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e))
+ ):
+ raise ProxyError(cause=e) from e
+ else:
+ raise TransportError(cause=e) from e
+
+ response = CurlCFFIResponseAdapter(curl_response)
+
+ if not 200 <= response.status < 300:
+ raise HTTPError(response, redirect_loop=max_redirects_exceeded)
+
+ return response
+
+
+@register_preference(CurlCFFIRH)
+def curl_cffi_preference(rh, request):
+ return -100
diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py
index d79dd79..8e678b2 100644
--- a/yt_dlp/networking/_helper.py
+++ b/yt_dlp/networking/_helper.py
@@ -2,6 +2,7 @@ from __future__ import annotations
import contextlib
import functools
+import os
import socket
import ssl
import sys
@@ -121,6 +122,9 @@ def make_ssl_context(
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
context.check_hostname = verify
context.verify_mode = ssl.CERT_REQUIRED if verify else ssl.CERT_NONE
+ # OpenSSL 1.1.1+ Python 3.8+ keylog file
+ if hasattr(context, 'keylog_filename'):
+ context.keylog_filename = os.environ.get('SSLKEYLOGFILE') or None
# Some servers may reject requests if ALPN extension is not sent. See:
# https://github.com/python/cpython/issues/85140
diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py
index 6545028..6397a2c 100644
--- a/yt_dlp/networking/_requests.py
+++ b/yt_dlp/networking/_requests.py
@@ -28,6 +28,7 @@ import requests.adapters
import requests.utils
import urllib3.connection
import urllib3.exceptions
+import urllib3.util
from ._helper import (
InstanceStoreMixin,
@@ -180,10 +181,25 @@ class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
+ # Skip `requests` internal verification; we use our own SSLContext
+ # requests 2.31.0+
def cert_verify(*args, **kwargs):
- # lean on SSLContext for cert verification
pass
+ # requests 2.31.0-2.32.1
+ def _get_connection(self, request, *_, proxies=None, **__):
+ return self.get_connection(request.url, proxies)
+
+ # requests 2.32.2+: Reimplementation without `_urllib3_request_context`
+ def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None):
+ url = urllib3.util.parse_url(request.url).url
+
+ manager = self.poolmanager
+ if proxy := select_proxy(url, proxies):
+ manager = self.proxy_manager_for(proxy)
+
+ return manager.connection_from_url(url)
+
class RequestsSession(requests.sessions.Session):
"""
@@ -307,8 +323,7 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
max_redirects_exceeded = False
- session = self._get_instance(
- cookiejar=request.extensions.get('cookiejar') or self.cookiejar)
+ session = self._get_instance(cookiejar=self._get_cookiejar(request))
try:
requests_res = session.request(
@@ -316,8 +331,8 @@ class RequestsRH(RequestHandler, InstanceStoreMixin):
url=request.url,
data=request.data,
headers=headers,
- timeout=float(request.extensions.get('timeout') or self.timeout),
- proxies=request.proxies or self.proxies,
+ timeout=self._calculate_timeout(request),
+ proxies=self._get_proxies(request),
allow_redirects=True,
stream=True
)
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index cb4dae3..ff110dc 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -389,11 +389,11 @@ class UrllibRH(RequestHandler, InstanceStoreMixin):
)
opener = self._get_instance(
- proxies=request.proxies or self.proxies,
- cookiejar=request.extensions.get('cookiejar') or self.cookiejar
+ proxies=self._get_proxies(request),
+ cookiejar=self._get_cookiejar(request)
)
try:
- res = opener.open(urllib_req, timeout=float(request.extensions.get('timeout') or self.timeout))
+ res = opener.open(urllib_req, timeout=self._calculate_timeout(request))
except urllib.error.HTTPError as e:
if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
# Prevent file object from being closed when urllib.error.HTTPError is destroyed.
diff --git a/yt_dlp/networking/_websockets.py b/yt_dlp/networking/_websockets.py
index 1597932..6e235b0 100644
--- a/yt_dlp/networking/_websockets.py
+++ b/yt_dlp/networking/_websockets.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import contextlib
import io
import logging
import ssl
@@ -38,27 +39,40 @@ if websockets_version < (12, 0):
import websockets.sync.client
from websockets.uri import parse_uri
+# In websockets Connection, recv_exc and recv_events_exc are defined
+# after the recv events handler thread is started [1].
+# On our CI using PyPy, in some cases a race condition may occur
+# where the recv events handler thread tries to use these attributes before they are defined [2].
+# 1: https://github.com/python-websockets/websockets/blame/de768cf65e7e2b1a3b67854fb9e08816a5ff7050/src/websockets/sync/connection.py#L93
+# 2: "AttributeError: 'ClientConnection' object has no attribute 'recv_events_exc'. Did you mean: 'recv_events'?"
+import websockets.sync.connection # isort: split
+with contextlib.suppress(Exception):
+ # > 12.0
+ websockets.sync.connection.Connection.recv_exc = None
+ # 12.0
+ websockets.sync.connection.Connection.recv_events_exc = None
+
class WebsocketsResponseAdapter(WebSocketResponse):
- def __init__(self, wsw: websockets.sync.client.ClientConnection, url):
+ def __init__(self, ws: websockets.sync.client.ClientConnection, url):
super().__init__(
- fp=io.BytesIO(wsw.response.body or b''),
+ fp=io.BytesIO(ws.response.body or b''),
url=url,
- headers=wsw.response.headers,
- status=wsw.response.status_code,
- reason=wsw.response.reason_phrase,
+ headers=ws.response.headers,
+ status=ws.response.status_code,
+ reason=ws.response.reason_phrase,
)
- self.wsw = wsw
+ self._ws = ws
def close(self):
- self.wsw.close()
+ self._ws.close()
super().close()
def send(self, message):
# https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send
try:
- return self.wsw.send(message)
+ return self._ws.send(message)
except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
raise TransportError(cause=e) from e
except SocksProxyError as e:
@@ -69,7 +83,7 @@ class WebsocketsResponseAdapter(WebSocketResponse):
def recv(self):
# https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv
try:
- return self.wsw.recv()
+ return self._ws.recv()
except SocksProxyError as e:
raise ProxyError(cause=e) from e
except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
@@ -112,10 +126,10 @@ class WebsocketsRH(WebSocketRequestHandler):
logging.getLogger(name).removeHandler(handler)
def _send(self, request):
- timeout = float(request.extensions.get('timeout') or self.timeout)
+ timeout = self._calculate_timeout(request)
headers = self._merge_headers(request.headers)
if 'cookie' not in headers:
- cookiejar = request.extensions.get('cookiejar') or self.cookiejar
+ cookiejar = self._get_cookiejar(request)
cookie_header = cookiejar.get_cookie_header(request.url)
if cookie_header:
headers['cookie'] = cookie_header
@@ -125,7 +139,7 @@ class WebsocketsRH(WebSocketRequestHandler):
'source_address': (self.source_address, 0) if self.source_address else None,
'timeout': timeout
}
- proxy = select_proxy(request.url, request.proxies or self.proxies or {})
+ proxy = select_proxy(request.url, self._get_proxies(request))
try:
if proxy:
socks_proxy_options = make_socks_proxy_opts(proxy)
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index 39442ba..d473e16 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -31,6 +31,8 @@ from ..utils import (
)
from ..utils.networking import HTTPHeaderDict, normalize_url
+DEFAULT_TIMEOUT = 20
+
def register_preference(*handlers: type[RequestHandler]):
assert all(issubclass(handler, RequestHandler) for handler in handlers)
@@ -235,7 +237,7 @@ class RequestHandler(abc.ABC):
self._logger = logger
self.headers = headers or {}
self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
- self.timeout = float(timeout or 20)
+ self.timeout = float(timeout or DEFAULT_TIMEOUT)
self.proxies = proxies or {}
self.source_address = source_address
self.verbose = verbose
@@ -256,6 +258,15 @@ class RequestHandler(abc.ABC):
def _merge_headers(self, request_headers):
return HTTPHeaderDict(self.headers, request_headers)
+ def _calculate_timeout(self, request):
+ return float(request.extensions.get('timeout') or self.timeout)
+
+ def _get_cookiejar(self, request):
+ return request.extensions.get('cookiejar') or self.cookiejar
+
+ def _get_proxies(self, request):
+ return (request.proxies or self.proxies).copy()
+
def _check_url_scheme(self, request: Request):
scheme = urllib.parse.urlparse(request.url).scheme.lower()
if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES:
@@ -454,9 +465,10 @@ class Request:
else:
raise TypeError('headers must be a mapping')
- def update(self, url=None, data=None, headers=None, query=None):
+ def update(self, url=None, data=None, headers=None, query=None, extensions=None):
self.data = data if data is not None else self.data
self.headers.update(headers or {})
+ self.extensions.update(extensions or {})
self.url = update_url_query(url or self.url, query or {})
def copy(self):
@@ -487,15 +499,18 @@ class Response(io.IOBase):
@param headers: response headers.
@param status: Response HTTP status code. Default is 200 OK.
@param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
+ @param extensions: Dictionary of handler-specific response extensions.
"""
def __init__(
self,
- fp: typing.IO,
+ fp: io.IOBase,
url: str,
headers: Mapping[str, str],
status: int = 200,
- reason: str = None):
+ reason: str = None,
+ extensions: dict = None
+ ):
self.fp = fp
self.headers = Message()
@@ -507,6 +522,7 @@ class Response(io.IOBase):
self.reason = reason or HTTPStatus(status).phrase
except ValueError:
self.reason = None
+ self.extensions = extensions or {}
def readable(self):
return self.fp.readable()
diff --git a/yt_dlp/networking/impersonate.py b/yt_dlp/networking/impersonate.py
new file mode 100644
index 0000000..ca66180
--- /dev/null
+++ b/yt_dlp/networking/impersonate.py
@@ -0,0 +1,141 @@
+from __future__ import annotations
+
+import re
+from abc import ABC
+from dataclasses import dataclass
+from typing import Any
+
+from .common import RequestHandler, register_preference
+from .exceptions import UnsupportedRequest
+from ..compat.types import NoneType
+from ..utils import classproperty, join_nonempty
+from ..utils.networking import std_headers
+
+
+@dataclass(order=True, frozen=True)
+class ImpersonateTarget:
+ """
+ A target for browser impersonation.
+
+ Parameters:
+ @param client: the client to impersonate
+ @param version: the client version to impersonate
+ @param os: the client OS to impersonate
+ @param os_version: the client OS version to impersonate
+
+ Note: None is used to indicate to match any.
+
+ """
+ client: str | None = None
+ version: str | None = None
+ os: str | None = None
+ os_version: str | None = None
+
+ def __post_init__(self):
+ if self.version and not self.client:
+ raise ValueError('client is required if version is set')
+ if self.os_version and not self.os:
+ raise ValueError('os is required if os_version is set')
+
+ def __contains__(self, target: ImpersonateTarget):
+ if not isinstance(target, ImpersonateTarget):
+ return False
+ return (
+ (self.client is None or target.client is None or self.client == target.client)
+ and (self.version is None or target.version is None or self.version == target.version)
+ and (self.os is None or target.os is None or self.os == target.os)
+ and (self.os_version is None or target.os_version is None or self.os_version == target.os_version)
+ )
+
+ def __str__(self):
+ return f'{join_nonempty(self.client, self.version)}:{join_nonempty(self.os, self.os_version)}'.rstrip(':')
+
+ @classmethod
+ def from_str(cls, target: str):
+ mobj = re.fullmatch(r'(?:(?P<client>[^:-]+)(?:-(?P<version>[^:-]+))?)?(?::(?:(?P<os>[^:-]+)(?:-(?P<os_version>[^:-]+))?)?)?', target)
+ if not mobj:
+ raise ValueError(f'Invalid impersonate target "{target}"')
+ return cls(**mobj.groupdict())
+
+
+class ImpersonateRequestHandler(RequestHandler, ABC):
+ """
+ Base class for request handlers that support browser impersonation.
+
+ This provides a method for checking the validity of the impersonate extension,
+ which can be used in _check_extensions.
+
+ Impersonate targets consist of a client, version, os and os_ver.
+ See the ImpersonateTarget class for more details.
+
+ The following may be defined:
+ - `_SUPPORTED_IMPERSONATE_TARGET_MAP`: a dict mapping supported targets to custom object.
+ Any Request with an impersonate target not in this list will raise an UnsupportedRequest.
+ Set to None to disable this check.
+ Note: Entries are in order of preference
+
+ Parameters:
+ @param impersonate: the default impersonate target to use for requests.
+ Set to None to disable impersonation.
+ """
+ _SUPPORTED_IMPERSONATE_TARGET_MAP: dict[ImpersonateTarget, Any] = {}
+
+ def __init__(self, *, impersonate: ImpersonateTarget = None, **kwargs):
+ super().__init__(**kwargs)
+ self.impersonate = impersonate
+
+ def _check_impersonate_target(self, target: ImpersonateTarget):
+ assert isinstance(target, (ImpersonateTarget, NoneType))
+ if target is None or not self.supported_targets:
+ return
+ if not self.is_supported_target(target):
+ raise UnsupportedRequest(f'Unsupported impersonate target: {target}')
+
+ def _check_extensions(self, extensions):
+ super()._check_extensions(extensions)
+ if 'impersonate' in extensions:
+ self._check_impersonate_target(extensions.get('impersonate'))
+
+ def _validate(self, request):
+ super()._validate(request)
+ self._check_impersonate_target(self.impersonate)
+
+ def _resolve_target(self, target: ImpersonateTarget | None):
+ """Resolve a target to a supported target."""
+ if target is None:
+ return
+ for supported_target in self.supported_targets:
+ if target in supported_target:
+ if self.verbose:
+ self._logger.stdout(
+ f'{self.RH_NAME}: resolved impersonate target {target} to {supported_target}')
+ return supported_target
+
+ @classproperty
+ def supported_targets(self) -> tuple[ImpersonateTarget, ...]:
+ return tuple(self._SUPPORTED_IMPERSONATE_TARGET_MAP.keys())
+
+ def is_supported_target(self, target: ImpersonateTarget):
+ assert isinstance(target, ImpersonateTarget)
+ return self._resolve_target(target) is not None
+
+ def _get_request_target(self, request):
+ """Get the requested target for the request"""
+ return self._resolve_target(request.extensions.get('impersonate') or self.impersonate)
+
+ def _get_impersonate_headers(self, request):
+ headers = self._merge_headers(request.headers)
+ if self._get_request_target(request) is not None:
+ # remove all headers present in std_headers
+ # todo: change this to not depend on std_headers
+ for k, v in std_headers.items():
+ if headers.get(k) == v:
+ headers.pop(k)
+ return headers
+
+
+@register_preference(ImpersonateRequestHandler)
+def impersonate_preference(rh, request):
+ if request.extensions.get('impersonate') or rh.impersonate:
+ return 1000
+ return 0
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index f884727..9615bfb 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -478,7 +478,7 @@ def create_parser():
}, 'aliases': {
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
- '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
+ '2021': ['2022', 'no-certifi', 'filename-sanitization'],
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
'2023': [],
}
@@ -516,6 +516,19 @@ def create_parser():
help='Client-side IP address to bind to',
)
network.add_option(
+ '--impersonate',
+ metavar='CLIENT[:OS]', dest='impersonate', default=None,
+ help=(
+ 'Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. '
+ 'Pass --impersonate="" to impersonate any client. Note that forcing impersonation '
+ 'for all requests may have a detrimental impact on download speed and stability'),
+ )
+ network.add_option(
+ '--list-impersonate-targets',
+ dest='list_impersonate_targets', default=False, action='store_true',
+ help='List available clients to impersonate.',
+ )
+ network.add_option(
'-4', '--force-ipv4',
action='store_const', const='0.0.0.0', dest='source_address',
help='Make all connections via IPv4',
@@ -680,6 +693,10 @@ def create_parser():
action='store_true', dest='break_on_existing', default=False,
help='Stop the download process when encountering a file that is in the archive')
selection.add_option(
+ '--no-break-on-existing',
+ action='store_false', dest='break_on_existing',
+ help='Do not stop the download process when encountering a file that is in the archive (default)')
+ selection.add_option(
'--break-on-reject',
action='store_true', dest='break_on_reject', default=False,
help=optparse.SUPPRESS_HELP)
@@ -1243,6 +1260,10 @@ def create_parser():
# TODO: Document the fields inside "progress"
'--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"'))
verbosity.add_option(
+ '--progress-delta',
+ metavar='SECONDS', action='store', dest='progress_delta', type=float, default=0,
+ help='Time between progress output (default: 0)')
+ verbosity.add_option(
'-v', '--verbose',
action='store_true', dest='verbose', default=False,
help='Print various debugging information')
diff --git a/yt_dlp/update.py b/yt_dlp/update.py
index db50cfa..ca70f69 100644
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@@ -69,6 +69,10 @@ def _get_variant_and_executable_path():
# Ref: https://en.wikipedia.org/wiki/Uname#Examples
if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
machine = '_x86' if platform.architecture()[0][:2] == '32' else ''
+ # sys.executable returns a /tmp/ path for staticx builds (linux_static)
+ # Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information
+ if static_exe_path := os.getenv('STATICX_PROG_PATH'):
+ path = static_exe_path
return f'{remove_end(sys.platform, "32")}{machine}_exe', path
path = os.path.dirname(__file__)
@@ -114,7 +118,7 @@ _NON_UPDATEABLE_REASONS = {
**{variant: f'Auto-update is not supported for unpackaged {name} executable; Re-download the latest release'
for variant, name in {'win32_dir': 'Windows', 'darwin_dir': 'MacOS', 'linux_dir': 'Linux'}.items()},
'source': 'You cannot update when running from source code; Use git to pull the latest changes',
- 'unknown': 'You installed yt-dlp with a package manager or setup.py; Use that to update',
+ 'unknown': 'You installed yt-dlp from a manual build or with a package manager; Use that to update',
'other': 'You are using an unofficial build of yt-dlp; Build the executable again',
}
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 9efeb6a..42803bb 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -5,7 +5,7 @@ import codecs
import collections
import collections.abc
import contextlib
-import datetime
+import datetime as dt
import email.header
import email.utils
import errno
@@ -50,7 +50,6 @@ from ..compat import (
compat_expanduser,
compat_HTMLParseError,
compat_os_name,
- compat_shlex_quote,
)
from ..dependencies import xattr
@@ -836,9 +835,11 @@ class Popen(subprocess.Popen):
if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
if not isinstance(args, str):
- args = ' '.join(compat_shlex_quote(a) for a in args)
+ args = shell_quote(args, shell=True)
shell = False
- args = f'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"'
+ # Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
+ env['='] = '"^\n\n"'
+ args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)
@@ -1133,7 +1134,7 @@ def is_path_like(f):
return isinstance(f, (str, bytes, os.PathLike))
-def extract_timezone(date_str):
+def extract_timezone(date_str, default=None):
m = re.search(
r'''(?x)
^.{8,}? # >=8 char non-TZ prefix, if present
@@ -1145,21 +1146,25 @@ def extract_timezone(date_str):
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
$)
''', date_str)
+ timezone = None
+
if not m:
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
if timezone is not None:
date_str = date_str[:-len(m.group('tz'))]
- timezone = datetime.timedelta(hours=timezone or 0)
+ timezone = dt.timedelta(hours=timezone)
else:
date_str = date_str[:-len(m.group('tz'))]
- if not m.group('sign'):
- timezone = datetime.timedelta()
- else:
+ if m.group('sign'):
sign = 1 if m.group('sign') == '+' else -1
- timezone = datetime.timedelta(
+ timezone = dt.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))
+
+ if timezone is None and default is not NO_DEFAULT:
+ timezone = default or dt.timedelta()
+
return timezone, date_str
@@ -1171,13 +1176,12 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
date_str = re.sub(r'\.[0-9]+', '', date_str)
- if timezone is None:
- timezone, date_str = extract_timezone(date_str)
+ timezone, date_str = extract_timezone(date_str, timezone)
- with contextlib.suppress(ValueError):
+ with contextlib.suppress(ValueError, TypeError):
date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
- dt = datetime.datetime.strptime(date_str, date_format) - timezone
- return calendar.timegm(dt.timetuple())
+ dt_ = dt.datetime.strptime(date_str, date_format) - timezone
+ return calendar.timegm(dt_.timetuple())
def date_formats(day_first=True):
@@ -1198,12 +1202,12 @@ def unified_strdate(date_str, day_first=True):
for expression in date_formats(day_first):
with contextlib.suppress(ValueError):
- upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
+ upload_date = dt.datetime.strptime(date_str, expression).strftime('%Y%m%d')
if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
with contextlib.suppress(ValueError):
- upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+ upload_date = dt.datetime(*timetuple[:6]).strftime('%Y%m%d')
if upload_date is not None:
return str(upload_date)
@@ -1233,8 +1237,8 @@ def unified_timestamp(date_str, day_first=True):
for expression in date_formats(day_first):
with contextlib.suppress(ValueError):
- dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
- return calendar.timegm(dt.timetuple())
+ dt_ = dt.datetime.strptime(date_str, expression) - timezone + dt.timedelta(hours=pm_delta)
+ return calendar.timegm(dt_.timetuple())
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
@@ -1272,11 +1276,11 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
if precision == 'auto':
auto_precision = True
precision = 'microsecond'
- today = datetime_round(datetime.datetime.now(datetime.timezone.utc), precision)
+ today = datetime_round(dt.datetime.now(dt.timezone.utc), precision)
if date_str in ('now', 'today'):
return today
if date_str == 'yesterday':
- return today - datetime.timedelta(days=1)
+ return today - dt.timedelta(days=1)
match = re.match(
r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
date_str)
@@ -1291,13 +1295,13 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
if unit == 'week':
unit = 'day'
time *= 7
- delta = datetime.timedelta(**{unit + 's': time})
+ delta = dt.timedelta(**{unit + 's': time})
new_date = start_time + delta
if auto_precision:
return datetime_round(new_date, unit)
return new_date
- return datetime_round(datetime.datetime.strptime(date_str, format), precision)
+ return datetime_round(dt.datetime.strptime(date_str, format), precision)
def date_from_str(date_str, format='%Y%m%d', strict=False):
@@ -1312,21 +1316,21 @@ def date_from_str(date_str, format='%Y%m%d', strict=False):
return datetime_from_str(date_str, precision='microsecond', format=format).date()
-def datetime_add_months(dt, months):
+def datetime_add_months(dt_, months):
"""Increment/Decrement a datetime object by months."""
- month = dt.month + months - 1
- year = dt.year + month // 12
+ month = dt_.month + months - 1
+ year = dt_.year + month // 12
month = month % 12 + 1
- day = min(dt.day, calendar.monthrange(year, month)[1])
- return dt.replace(year, month, day)
+ day = min(dt_.day, calendar.monthrange(year, month)[1])
+ return dt_.replace(year, month, day)
-def datetime_round(dt, precision='day'):
+def datetime_round(dt_, precision='day'):
"""
Round a datetime object's time to a specific precision
"""
if precision == 'microsecond':
- return dt
+ return dt_
unit_seconds = {
'day': 86400,
@@ -1335,8 +1339,8 @@ def datetime_round(dt, precision='day'):
'second': 1,
}
roundto = lambda x, n: ((x + n / 2) // n) * n
- timestamp = roundto(calendar.timegm(dt.timetuple()), unit_seconds[precision])
- return datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
+ timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision])
+ return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc)
def hyphenate_date(date_str):
@@ -1357,11 +1361,11 @@ class DateRange:
if start is not None:
self.start = date_from_str(start, strict=True)
else:
- self.start = datetime.datetime.min.date()
+ self.start = dt.datetime.min.date()
if end is not None:
self.end = date_from_str(end, strict=True)
else:
- self.end = datetime.datetime.max.date()
+ self.end = dt.datetime.max.date()
if self.start > self.end:
raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
@@ -1372,7 +1376,7 @@ class DateRange:
def __contains__(self, date):
"""Check if the date is in the range"""
- if not isinstance(date, datetime.date):
+ if not isinstance(date, dt.date):
date = date_from_str(date)
return self.start <= date <= self.end
@@ -1637,15 +1641,31 @@ def get_filesystem_encoding():
return encoding if encoding is not None else 'utf-8'
-def shell_quote(args):
- quoted_args = []
- encoding = get_filesystem_encoding()
- for a in args:
- if isinstance(a, bytes):
- # We may get a filename encoded with 'encodeFilename'
- a = a.decode(encoding)
- quoted_args.append(compat_shlex_quote(a))
- return ' '.join(quoted_args)
+_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
+_CMD_QUOTE_TRANS = str.maketrans({
+ # Keep quotes balanced by replacing them with `""` instead of `\\"`
+ '"': '""',
+ # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
+ # `=` should be unique since variables containing `=` cannot be set using cmd
+ '\n': '%=%',
+ '\r': '%=%',
+ # Use zero length variable replacement so `%` doesn't get expanded
+ # `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
+ '%': '%%cd:~,%',
+})
+
+
+def shell_quote(args, *, shell=False):
+ args = list(variadic(args))
+
+ if compat_os_name != 'nt':
+ return shlex.join(args)
+
+ trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
+ return ' '.join(
+ s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
+ else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
+ for s in args)
def smuggle_url(url, data):
@@ -1996,12 +2016,12 @@ def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
if isinstance(timestamp, (int, float)): # unix timestamp
# Using naive datetime here can break timestamp() in Windows
# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
- # Also, datetime.datetime.fromtimestamp breaks for negative timestamps
+ # Also, dt.datetime.fromtimestamp breaks for negative timestamps
# Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
- datetime_object = (datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
- + datetime.timedelta(seconds=timestamp))
+ datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc)
+ + dt.timedelta(seconds=timestamp))
elif isinstance(timestamp, str): # assume YYYYMMDD
- datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
+ datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d')
date_format = re.sub( # Support %s on windows
r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
return datetime_object.strftime(date_format)
@@ -2505,7 +2525,7 @@ def read_batch_urls(batch_fd):
return False
# "#" cannot be stripped out since it is part of the URI
# However, it can be safely stripped out if following a whitespace
- return re.split(r'\s#', url, 1)[0].rstrip()
+ return re.split(r'\s#', url, maxsplit=1)[0].rstrip()
with contextlib.closing(batch_fd) as fd:
return [url for url in map(fixup, fd) if url]
@@ -2849,7 +2869,7 @@ def ytdl_is_updateable():
def args_to_str(args):
# Get a short string representation for a subprocess command
- return ' '.join(compat_shlex_quote(a) for a in args)
+ return shell_quote(args)
def error_to_str(err):
@@ -4490,10 +4510,10 @@ def write_xattr(path, key, value):
def random_birthday(year_field, month_field, day_field):
- start_date = datetime.date(1950, 1, 1)
- end_date = datetime.date(1995, 12, 31)
+ start_date = dt.date(1950, 1, 1)
+ end_date = dt.date(1995, 12, 31)
offset = random.randint(0, (end_date - start_date).days)
- random_date = start_date + datetime.timedelta(offset)
+ random_date = start_date + dt.timedelta(offset)
return {
year_field: str(random_date.year),
month_field: str(random_date.month),
@@ -4672,7 +4692,7 @@ def time_seconds(**kwargs):
"""
Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
"""
- return time.time() + datetime.timedelta(**kwargs).total_seconds()
+ return time.time() + dt.timedelta(**kwargs).total_seconds()
# create a JSON Web Signature (jws) with HS256 algorithm
@@ -5415,6 +5435,17 @@ class FormatSorter:
return tuple(self._calculate_field_preference(format, field) for field in self._order)
+def filesize_from_tbr(tbr, duration):
+ """
+ @param tbr: Total bitrate in kbps (1000 bits/sec)
+ @param duration: Duration in seconds
+ @returns Filesize in bytes
+ """
+ if tbr is None or duration is None:
+ return None
+ return int(duration * tbr * (1000 / 8))
+
+
# XXX: Temporary
class _YDLLogger:
def __init__(self, ydl=None):
diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py
index 8938f4c..96eb2ed 100644
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@@ -1,5 +1,6 @@
import collections.abc
import contextlib
+import http.cookies
import inspect
import itertools
import re
@@ -28,7 +29,8 @@ def traverse_obj(
Each of the provided `paths` is tested and the first producing a valid result will be returned.
The next path will also be tested if the path branched but no results could be found.
- Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
+ Supported values for traversal are `Mapping`, `Iterable`, `re.Match`,
+ `xml.etree.ElementTree` (xpath) and `http.cookies.Morsel`.
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
@@ -36,8 +38,8 @@ def traverse_obj(
The keys in the path can be one of:
- `None`: Return the current object.
- `set`: Requires the only item in the set to be a type or function,
- like `{type}`/`{func}`. If a `type`, returns only values
- of this type. If a function, returns `func(obj)`.
+ like `{type}`/`{type, type, ...}/`{func}`. If a `type`, return only
+ values of this type. If a function, returns `func(obj)`.
- `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
- `slice`: Branch out and return all values in `obj[key]`.
- `Ellipsis`: Branch out and return a list of all values.
@@ -48,8 +50,10 @@ def traverse_obj(
For `Iterable`s, `key` is the index of the value.
For `re.Match`es, `key` is the group number (0 = full match)
as well as additionally any group names, if given.
- - `dict` Transform the current object and return a matching dict.
+ - `dict`: Transform the current object and return a matching dict.
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
+ - `any`-builtin: Take the first matching object and return it, resetting branching.
+ - `all`-builtin: Take all matching objects and return them as a list, resetting branching.
`tuple`, `list`, and `dict` all support nested paths and branches.
@@ -102,10 +106,10 @@ def traverse_obj(
result = obj
elif isinstance(key, set):
- assert len(key) == 1, 'Set should only be used to wrap a single item'
item = next(iter(key))
- if isinstance(item, type):
- if isinstance(obj, item):
+ if len(key) > 1 or isinstance(item, type):
+ assert all(isinstance(item, type) for item in key)
+ if isinstance(obj, tuple(key)):
result = obj
else:
result = try_call(item, args=(obj,))
@@ -117,6 +121,8 @@ def traverse_obj(
elif key is ...:
branching = True
+ if isinstance(obj, http.cookies.Morsel):
+ obj = dict(obj, key=obj.key, value=obj.value)
if isinstance(obj, collections.abc.Mapping):
result = obj.values()
elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
@@ -131,6 +137,8 @@ def traverse_obj(
elif callable(key):
branching = True
+ if isinstance(obj, http.cookies.Morsel):
+ obj = dict(obj, key=obj.key, value=obj.value)
if isinstance(obj, collections.abc.Mapping):
iter_obj = obj.items()
elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
@@ -157,6 +165,8 @@ def traverse_obj(
} or None
elif isinstance(obj, collections.abc.Mapping):
+ if isinstance(obj, http.cookies.Morsel):
+ obj = dict(obj, key=obj.key, value=obj.value)
result = (try_call(obj.get, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,)) else
next((v for k, v in obj.items() if casefold(k) == key), None))
@@ -179,7 +189,7 @@ def traverse_obj(
elif isinstance(obj, xml.etree.ElementTree.Element) and isinstance(key, str):
xpath, _, special = key.rpartition('/')
- if not special.startswith('@') and special != 'text()':
+ if not special.startswith('@') and not special.endswith('()'):
xpath = key
special = None
@@ -198,7 +208,7 @@ def traverse_obj(
return try_call(element.attrib.get, args=(special[1:],))
if special == 'text()':
return element.text
- assert False, f'apply_specials is missing case for {special!r}'
+ raise SyntaxError(f'apply_specials is missing case for {special!r}')
if xpath:
result = list(map(apply_specials, obj.iterfind(xpath)))
@@ -228,6 +238,15 @@ def traverse_obj(
if not casesense and isinstance(key, str):
key = key.casefold()
+ if key in (any, all):
+ has_branched = False
+ filtered_objs = (obj for obj in objs if obj not in (None, {}))
+ if key is any:
+ objs = (next(filtered_objs, None),)
+ else:
+ objs = (list(filtered_objs),)
+ continue
+
if __debug__ and callable(key):
# Verify function signature
inspect.signature(key).bind(None, None)
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 68c3f00..a90b288 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
-__version__ = '2024.03.10'
+__version__ = '2024.05.27'
-RELEASE_GIT_HEAD = '615a84447e8322720be77a0e64298d7f42848693'
+RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b'
VARIANT = None
@@ -12,4 +12,4 @@ CHANNEL = 'stable'
ORIGIN = 'yt-dlp/yt-dlp'
-_pkg_version = '2024.03.10'
+_pkg_version = '2024.05.27'