From 7e21328585afda6d66f98ca476301680eeffac32 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 5 Aug 2024 11:06:10 +0200 Subject: Adding upstream version 2024.07.01. Signed-off-by: Daniel Baumann --- yt_dlp/YoutubeDL.py | 337 +++++----- yt_dlp/__init__.py | 47 +- yt_dlp/aes.py | 36 +- yt_dlp/cache.py | 4 +- yt_dlp/compat/_legacy.py | 2 +- yt_dlp/compat/functools.py | 2 +- yt_dlp/compat/imghdr.py | 26 +- yt_dlp/cookies.py | 125 ++-- yt_dlp/downloader/common.py | 2 +- yt_dlp/downloader/external.py | 28 +- yt_dlp/downloader/f4m.py | 22 +- yt_dlp/downloader/fragment.py | 8 +- yt_dlp/downloader/hls.py | 33 +- yt_dlp/downloader/http.py | 14 +- yt_dlp/downloader/ism.py | 2 +- yt_dlp/downloader/mhtml.py | 53 +- yt_dlp/downloader/niconico.py | 14 +- yt_dlp/downloader/rtmp.py | 6 +- yt_dlp/downloader/youtube_live_chat.py | 2 +- yt_dlp/extractor/_extractors.py | 29 +- yt_dlp/extractor/abc.py | 28 +- yt_dlp/extractor/abcnews.py | 2 +- yt_dlp/extractor/abcotvs.py | 5 +- yt_dlp/extractor/abematv.py | 22 +- yt_dlp/extractor/acast.py | 8 +- yt_dlp/extractor/acfun.py | 4 +- yt_dlp/extractor/adn.py | 19 +- yt_dlp/extractor/adobeconnect.py | 10 +- yt_dlp/extractor/adobepass.py | 866 ++++++++++++------------- yt_dlp/extractor/adobetv.py | 5 +- yt_dlp/extractor/adultswim.py | 6 +- yt_dlp/extractor/aenetworks.py | 34 +- yt_dlp/extractor/aeonco.py | 8 +- yt_dlp/extractor/afreecatv.py | 45 +- yt_dlp/extractor/agora.py | 6 +- yt_dlp/extractor/airtv.py | 4 +- yt_dlp/extractor/aitube.py | 2 +- yt_dlp/extractor/aliexpress.py | 3 +- yt_dlp/extractor/aljazeera.py | 14 +- yt_dlp/extractor/allocine.py | 5 +- yt_dlp/extractor/allstar.py | 26 +- yt_dlp/extractor/alphaporno.py | 2 +- yt_dlp/extractor/alsace20tv.py | 6 +- yt_dlp/extractor/altcensored.py | 2 +- yt_dlp/extractor/alura.py | 16 +- yt_dlp/extractor/amadeustv.py | 2 +- yt_dlp/extractor/amara.py | 10 +- yt_dlp/extractor/amazon.py | 8 +- yt_dlp/extractor/amazonminitv.py | 2 +- yt_dlp/extractor/amcnetworks.py | 12 +- yt_dlp/extractor/americastestkitchen.py | 14 +- yt_dlp/extractor/amp.py | 4 +- yt_dlp/extractor/anchorfm.py | 6 +- yt_dlp/extractor/angel.py | 10 +- yt_dlp/extractor/antenna.py | 2 +- yt_dlp/extractor/anvato.py | 8 +- yt_dlp/extractor/aol.py | 8 +- yt_dlp/extractor/apa.py | 4 +- yt_dlp/extractor/applepodcasts.py | 2 +- yt_dlp/extractor/appletrailers.py | 31 +- yt_dlp/extractor/archiveorg.py | 105 +-- yt_dlp/extractor/arcpublishing.py | 11 +- yt_dlp/extractor/ard.py | 6 +- yt_dlp/extractor/arkena.py | 6 +- yt_dlp/extractor/arnes.py | 12 +- yt_dlp/extractor/art19.py | 2 +- yt_dlp/extractor/arte.py | 24 +- yt_dlp/extractor/atresplayer.py | 23 +- yt_dlp/extractor/atscaleconf.py | 10 +- yt_dlp/extractor/atvat.py | 16 +- yt_dlp/extractor/audimedia.py | 4 +- yt_dlp/extractor/audioboom.py | 4 +- yt_dlp/extractor/audiodraft.py | 13 +- yt_dlp/extractor/audiomack.py | 27 +- yt_dlp/extractor/audius.py | 46 +- yt_dlp/extractor/awaan.py | 27 +- yt_dlp/extractor/aws.py | 24 +- yt_dlp/extractor/azmedien.py | 6 +- yt_dlp/extractor/baidu.py | 7 +- yt_dlp/extractor/banbye.py | 9 +- yt_dlp/extractor/bandcamp.py | 31 +- yt_dlp/extractor/bannedvideo.py | 12 +- yt_dlp/extractor/bbc.py | 110 ++-- yt_dlp/extractor/beatport.py | 7 +- yt_dlp/extractor/beeg.py | 8 +- yt_dlp/extractor/behindkink.py | 2 +- yt_dlp/extractor/bellmedia.py | 2 +- yt_dlp/extractor/berufetv.py | 4 +- yt_dlp/extractor/bet.py | 8 +- yt_dlp/extractor/bfmtv.py | 4 +- yt_dlp/extractor/bigflix.py | 14 +- yt_dlp/extractor/bigo.py | 2 +- yt_dlp/extractor/bild.py | 4 +- yt_dlp/extractor/bilibili.py | 494 +++++++++----- yt_dlp/extractor/bitchute.py | 30 +- yt_dlp/extractor/blackboardcollaborate.py | 2 +- yt_dlp/extractor/bleacherreport.py | 12 +- yt_dlp/extractor/blerp.py | 25 +- yt_dlp/extractor/blogger.py | 4 +- yt_dlp/extractor/bloomberg.py | 2 +- yt_dlp/extractor/bokecc.py | 15 +- yt_dlp/extractor/bongacams.py | 9 +- yt_dlp/extractor/bostonglobe.py | 3 +- yt_dlp/extractor/box.py | 6 +- yt_dlp/extractor/boxcast.py | 10 +- yt_dlp/extractor/br.py | 8 +- yt_dlp/extractor/brainpop.py | 14 +- yt_dlp/extractor/bravotv.py | 2 +- yt_dlp/extractor/breitbart.py | 4 +- yt_dlp/extractor/brightcove.py | 84 ++- yt_dlp/extractor/bundesliga.py | 10 +- yt_dlp/extractor/businessinsider.py | 4 +- yt_dlp/extractor/buzzfeed.py | 6 +- yt_dlp/extractor/byutv.py | 2 +- yt_dlp/extractor/c56.py | 4 +- yt_dlp/extractor/callin.py | 16 +- yt_dlp/extractor/caltrans.py | 2 +- yt_dlp/extractor/cam4.py | 4 +- yt_dlp/extractor/camdemy.py | 33 +- yt_dlp/extractor/camfm.py | 4 +- yt_dlp/extractor/cammodels.py | 8 +- yt_dlp/extractor/camtasia.py | 6 +- yt_dlp/extractor/canalalpha.py | 8 +- yt_dlp/extractor/canalc2.py | 2 +- yt_dlp/extractor/canalplus.py | 5 +- yt_dlp/extractor/caracoltv.py | 4 +- yt_dlp/extractor/cartoonnetwork.py | 2 +- yt_dlp/extractor/cbc.py | 57 +- yt_dlp/extractor/cbs.py | 6 +- yt_dlp/extractor/cbsnews.py | 1 - yt_dlp/extractor/ccc.py | 6 +- yt_dlp/extractor/ccma.py | 6 +- yt_dlp/extractor/cctv.py | 7 +- yt_dlp/extractor/cda.py | 29 +- yt_dlp/extractor/cellebrite.py | 4 +- yt_dlp/extractor/ceskatelevize.py | 24 +- yt_dlp/extractor/cgtn.py | 10 +- yt_dlp/extractor/chaturbate.py | 6 +- yt_dlp/extractor/cinemax.py | 2 +- yt_dlp/extractor/cinetecamilano.py | 8 +- yt_dlp/extractor/cineverse.py | 10 +- yt_dlp/extractor/ciscolive.py | 4 +- yt_dlp/extractor/ciscowebex.py | 4 +- yt_dlp/extractor/cjsw.py | 2 +- yt_dlp/extractor/clippit.py | 4 +- yt_dlp/extractor/cliprs.py | 2 +- yt_dlp/extractor/closertotruth.py | 10 +- yt_dlp/extractor/cloudflarestream.py | 19 +- yt_dlp/extractor/cloudycdn.py | 25 +- yt_dlp/extractor/clubic.py | 4 +- yt_dlp/extractor/clyp.py | 6 +- yt_dlp/extractor/cmt.py | 4 +- yt_dlp/extractor/cnn.py | 8 +- yt_dlp/extractor/common.py | 148 +++-- yt_dlp/extractor/commonmistakes.py | 6 +- yt_dlp/extractor/commonprotocols.py | 2 +- yt_dlp/extractor/condenast.py | 33 +- yt_dlp/extractor/contv.py | 2 +- yt_dlp/extractor/corus.py | 12 +- yt_dlp/extractor/coub.py | 8 +- yt_dlp/extractor/cozytv.py | 10 +- yt_dlp/extractor/cpac.py | 24 +- yt_dlp/extractor/cracked.py | 4 +- yt_dlp/extractor/crackle.py | 14 +- yt_dlp/extractor/craftsy.py | 2 +- yt_dlp/extractor/crooksandliars.py | 4 +- yt_dlp/extractor/crowdbunker.py | 28 +- yt_dlp/extractor/crtvg.py | 4 +- yt_dlp/extractor/crunchyroll.py | 6 +- yt_dlp/extractor/cspan.py | 26 +- yt_dlp/extractor/ctsnews.py | 4 +- yt_dlp/extractor/ctv.py | 4 +- yt_dlp/extractor/ctvnews.py | 6 +- yt_dlp/extractor/cultureunplugged.py | 8 +- yt_dlp/extractor/curiositystream.py | 9 +- yt_dlp/extractor/cwtv.py | 4 +- yt_dlp/extractor/cybrary.py | 20 +- yt_dlp/extractor/dailymail.py | 10 +- yt_dlp/extractor/dailymotion.py | 16 +- yt_dlp/extractor/dailywire.py | 6 +- yt_dlp/extractor/damtomo.py | 9 +- yt_dlp/extractor/daum.py | 28 +- yt_dlp/extractor/dbtv.py | 2 +- yt_dlp/extractor/dctp.py | 11 +- yt_dlp/extractor/deezer.py | 6 +- yt_dlp/extractor/democracynow.py | 8 +- yt_dlp/extractor/detik.py | 20 +- yt_dlp/extractor/deuxm.py | 16 +- yt_dlp/extractor/dfb.py | 4 +- yt_dlp/extractor/digitalconcerthall.py | 67 +- yt_dlp/extractor/digiteka.py | 2 +- yt_dlp/extractor/discovery.py | 10 +- yt_dlp/extractor/discoverygo.py | 5 +- yt_dlp/extractor/disney.py | 8 +- yt_dlp/extractor/dispeak.py | 10 +- yt_dlp/extractor/dlf.py | 36 +- yt_dlp/extractor/dlive.py | 8 +- yt_dlp/extractor/douyutv.py | 10 +- yt_dlp/extractor/dplay.py | 16 +- yt_dlp/extractor/drbonanza.py | 2 +- yt_dlp/extractor/dreisat.py | 4 +- yt_dlp/extractor/drooble.py | 6 +- yt_dlp/extractor/dropbox.py | 14 +- yt_dlp/extractor/dropout.py | 34 +- yt_dlp/extractor/drtuber.py | 10 +- yt_dlp/extractor/drtv.py | 18 +- yt_dlp/extractor/dtube.py | 6 +- yt_dlp/extractor/duboku.py | 29 +- yt_dlp/extractor/dumpert.py | 4 +- yt_dlp/extractor/dvtv.py | 14 +- yt_dlp/extractor/dw.py | 15 +- yt_dlp/extractor/eagleplatform.py | 20 +- yt_dlp/extractor/ebaumsworld.py | 2 +- yt_dlp/extractor/ebay.py | 4 +- yt_dlp/extractor/egghead.py | 11 +- yt_dlp/extractor/eighttracks.py | 49 +- yt_dlp/extractor/eitb.py | 18 +- yt_dlp/extractor/elpais.py | 4 +- yt_dlp/extractor/eltrecetv.py | 4 +- yt_dlp/extractor/epicon.py | 29 +- yt_dlp/extractor/epoch.py | 10 +- yt_dlp/extractor/eporner.py | 13 +- yt_dlp/extractor/erocast.py | 2 +- yt_dlp/extractor/eroprofile.py | 6 +- yt_dlp/extractor/err.py | 2 +- yt_dlp/extractor/ertgr.py | 17 +- yt_dlp/extractor/espn.py | 41 +- yt_dlp/extractor/ettutv.py | 2 +- yt_dlp/extractor/europa.py | 24 +- yt_dlp/extractor/europeantour.py | 8 +- yt_dlp/extractor/eurosport.py | 10 +- yt_dlp/extractor/euscreen.py | 18 +- yt_dlp/extractor/expressen.py | 2 +- yt_dlp/extractor/eyedotv.py | 12 +- yt_dlp/extractor/facebook.py | 52 +- yt_dlp/extractor/fancode.py | 35 +- yt_dlp/extractor/fc2.py | 18 +- yt_dlp/extractor/filmon.py | 11 +- yt_dlp/extractor/filmweb.py | 2 +- yt_dlp/extractor/firsttv.py | 27 +- yt_dlp/extractor/flickr.py | 14 +- yt_dlp/extractor/floatplane.py | 2 +- yt_dlp/extractor/folketinget.py | 5 +- yt_dlp/extractor/footyroom.py | 2 +- yt_dlp/extractor/fourtube.py | 41 +- yt_dlp/extractor/fox.py | 15 +- yt_dlp/extractor/fptplay.py | 2 +- yt_dlp/extractor/francetv.py | 54 +- yt_dlp/extractor/freesound.py | 2 +- yt_dlp/extractor/freetv.py | 10 +- yt_dlp/extractor/frontendmasters.py | 31 +- yt_dlp/extractor/fujitv.py | 8 +- yt_dlp/extractor/funimation.py | 32 +- yt_dlp/extractor/funker530.py | 6 +- yt_dlp/extractor/fuyintv.py | 2 +- yt_dlp/extractor/gab.py | 22 +- yt_dlp/extractor/gaia.py | 14 +- yt_dlp/extractor/gamejolt.py | 34 +- yt_dlp/extractor/gamespot.py | 5 +- yt_dlp/extractor/gamestar.py | 6 +- yt_dlp/extractor/gaskrank.py | 4 +- yt_dlp/extractor/gazeta.py | 4 +- yt_dlp/extractor/gbnews.py | 14 +- yt_dlp/extractor/gdcvault.py | 10 +- yt_dlp/extractor/gedidigital.py | 4 +- yt_dlp/extractor/generic.py | 155 ++--- yt_dlp/extractor/genericembeds.py | 10 +- yt_dlp/extractor/getcourseru.py | 22 +- yt_dlp/extractor/gettr.py | 14 +- yt_dlp/extractor/giantbomb.py | 2 +- yt_dlp/extractor/gigya.py | 2 +- yt_dlp/extractor/glide.py | 2 +- yt_dlp/extractor/globalplayer.py | 4 +- yt_dlp/extractor/globo.py | 29 +- yt_dlp/extractor/glomex.py | 6 +- yt_dlp/extractor/gmanetwork.py | 4 +- yt_dlp/extractor/go.py | 20 +- yt_dlp/extractor/godresource.py | 10 +- yt_dlp/extractor/godtube.py | 4 +- yt_dlp/extractor/gofile.py | 8 +- yt_dlp/extractor/golem.py | 16 +- yt_dlp/extractor/googledrive.py | 20 +- yt_dlp/extractor/googlepodcasts.py | 2 +- yt_dlp/extractor/goplay.py | 143 ++-- yt_dlp/extractor/gopro.py | 8 +- yt_dlp/extractor/goshgay.py | 9 +- yt_dlp/extractor/gotostage.py | 19 +- yt_dlp/extractor/gputechconf.py | 4 +- yt_dlp/extractor/graspop.py | 32 + yt_dlp/extractor/gronkh.py | 14 +- yt_dlp/extractor/groupon.py | 3 +- yt_dlp/extractor/harpodeon.py | 6 +- yt_dlp/extractor/hbo.py | 9 +- yt_dlp/extractor/heise.py | 6 +- yt_dlp/extractor/hidive.py | 6 +- yt_dlp/extractor/historicfilms.py | 2 +- yt_dlp/extractor/hitrecord.py | 11 +- yt_dlp/extractor/hketv.py | 6 +- yt_dlp/extractor/hollywoodreporter.py | 2 +- yt_dlp/extractor/holodex.py | 2 +- yt_dlp/extractor/hotnewhiphop.py | 9 +- yt_dlp/extractor/hotstar.py | 11 +- yt_dlp/extractor/hrfensehen.py | 12 +- yt_dlp/extractor/hrti.py | 20 +- yt_dlp/extractor/hse.py | 4 +- yt_dlp/extractor/huajiao.py | 2 +- yt_dlp/extractor/huffpost.py | 2 +- yt_dlp/extractor/hungama.py | 6 +- yt_dlp/extractor/huya.py | 13 +- yt_dlp/extractor/hypem.py | 6 +- yt_dlp/extractor/hypergryph.py | 4 +- yt_dlp/extractor/hytale.py | 4 +- yt_dlp/extractor/icareus.py | 12 +- yt_dlp/extractor/ichinanalive.py | 15 +- yt_dlp/extractor/ign.py | 21 +- yt_dlp/extractor/iheart.py | 2 +- yt_dlp/extractor/ilpost.py | 2 +- yt_dlp/extractor/iltalehti.py | 2 +- yt_dlp/extractor/imdb.py | 8 +- yt_dlp/extractor/imggaming.py | 2 +- yt_dlp/extractor/imgur.py | 21 +- yt_dlp/extractor/ina.py | 2 +- yt_dlp/extractor/inc.py | 2 +- yt_dlp/extractor/indavideo.py | 4 +- yt_dlp/extractor/infoq.py | 12 +- yt_dlp/extractor/instagram.py | 56 +- yt_dlp/extractor/internazionale.py | 4 +- yt_dlp/extractor/iprima.py | 4 +- yt_dlp/extractor/iqiyi.py | 77 +-- yt_dlp/extractor/islamchannel.py | 2 +- yt_dlp/extractor/israelnationalnews.py | 6 +- yt_dlp/extractor/itprotv.py | 14 +- yt_dlp/extractor/itv.py | 33 +- yt_dlp/extractor/ivi.py | 26 +- yt_dlp/extractor/ivideon.py | 16 +- yt_dlp/extractor/iwara.py | 9 +- yt_dlp/extractor/ixigua.py | 2 +- yt_dlp/extractor/izlesene.py | 18 +- yt_dlp/extractor/jamendo.py | 32 +- yt_dlp/extractor/japandiet.py | 8 +- yt_dlp/extractor/jiocinema.py | 37 +- yt_dlp/extractor/jiosaavn.py | 2 +- yt_dlp/extractor/joj.py | 15 +- yt_dlp/extractor/jove.py | 4 +- yt_dlp/extractor/jwplatform.py | 4 +- yt_dlp/extractor/kakao.py | 10 +- yt_dlp/extractor/kaltura.py | 73 +-- yt_dlp/extractor/kankanews.py | 2 +- yt_dlp/extractor/karaoketv.py | 4 +- yt_dlp/extractor/kelbyone.py | 2 +- yt_dlp/extractor/khanacademy.py | 141 ++-- yt_dlp/extractor/kicker.py | 6 +- yt_dlp/extractor/kinja.py | 18 +- yt_dlp/extractor/kommunetv.py | 10 +- yt_dlp/extractor/kompas.py | 2 +- yt_dlp/extractor/koo.py | 27 +- yt_dlp/extractor/kth.py | 7 +- yt_dlp/extractor/ku6.py | 10 +- yt_dlp/extractor/kuwo.py | 42 +- yt_dlp/extractor/la7.py | 2 +- yt_dlp/extractor/laracasts.py | 114 ++++ yt_dlp/extractor/laxarxames.py | 2 +- yt_dlp/extractor/lbry.py | 16 +- yt_dlp/extractor/lcp.py | 2 +- yt_dlp/extractor/lecture2go.py | 2 +- yt_dlp/extractor/lecturio.py | 6 +- yt_dlp/extractor/leeco.py | 41 +- yt_dlp/extractor/lego.py | 6 +- yt_dlp/extractor/lenta.py | 2 +- yt_dlp/extractor/libraryofcongress.py | 2 +- yt_dlp/extractor/libsyn.py | 6 +- yt_dlp/extractor/lifenews.py | 27 +- yt_dlp/extractor/likee.py | 2 +- yt_dlp/extractor/limelight.py | 16 +- yt_dlp/extractor/linkedin.py | 27 +- yt_dlp/extractor/liputan6.py | 6 +- yt_dlp/extractor/listennotes.py | 6 +- yt_dlp/extractor/litv.py | 6 +- yt_dlp/extractor/livejournal.py | 5 +- yt_dlp/extractor/livestream.py | 42 +- yt_dlp/extractor/livestreamfails.py | 4 +- yt_dlp/extractor/lnkgo.py | 27 +- yt_dlp/extractor/lovehomeporn.py | 6 +- yt_dlp/extractor/lrt.py | 8 +- yt_dlp/extractor/lsm.py | 12 +- yt_dlp/extractor/lumni.py | 2 +- yt_dlp/extractor/lynda.py | 54 +- yt_dlp/extractor/magentamusik.py | 2 +- yt_dlp/extractor/mailru.py | 15 +- yt_dlp/extractor/mainstreaming.py | 28 +- yt_dlp/extractor/mangomolo.py | 13 +- yt_dlp/extractor/manoto.py | 12 +- yt_dlp/extractor/manyvids.py | 8 +- yt_dlp/extractor/markiza.py | 9 +- yt_dlp/extractor/massengeschmacktv.py | 2 +- yt_dlp/extractor/masters.py | 2 +- yt_dlp/extractor/matchtv.py | 40 +- yt_dlp/extractor/mdr.py | 7 +- yt_dlp/extractor/medaltv.py | 13 +- yt_dlp/extractor/mediaite.py | 14 +- yt_dlp/extractor/mediaklikk.py | 39 +- yt_dlp/extractor/mediaset.py | 6 +- yt_dlp/extractor/mediasite.py | 96 +-- yt_dlp/extractor/mediaworksnz.py | 10 +- yt_dlp/extractor/meipai.py | 4 +- yt_dlp/extractor/melonvod.py | 4 +- yt_dlp/extractor/metacritic.py | 6 +- yt_dlp/extractor/mgtv.py | 6 +- yt_dlp/extractor/microsoftembed.py | 262 +++++++- yt_dlp/extractor/microsoftstream.py | 6 +- yt_dlp/extractor/microsoftvirtualacademy.py | 189 ------ yt_dlp/extractor/mildom.py | 10 +- yt_dlp/extractor/minds.py | 11 +- yt_dlp/extractor/minoto.py | 2 +- yt_dlp/extractor/mirrativ.py | 6 +- yt_dlp/extractor/mit.py | 8 +- yt_dlp/extractor/mixch.py | 4 +- yt_dlp/extractor/mixcloud.py | 37 +- yt_dlp/extractor/mlb.py | 34 +- yt_dlp/extractor/mlssoccer.py | 69 +- yt_dlp/extractor/mocha.py | 4 +- yt_dlp/extractor/mojvideo.py | 6 +- yt_dlp/extractor/monstercat.py | 6 +- yt_dlp/extractor/motherless.py | 6 +- yt_dlp/extractor/motorsport.py | 11 +- yt_dlp/extractor/moview.py | 6 +- yt_dlp/extractor/moviezine.py | 2 +- yt_dlp/extractor/movingimage.py | 2 +- yt_dlp/extractor/msn.py | 5 +- yt_dlp/extractor/mtv.py | 50 +- yt_dlp/extractor/muenchentv.py | 8 +- yt_dlp/extractor/murrtube.py | 6 +- yt_dlp/extractor/musescore.py | 12 +- yt_dlp/extractor/musicdex.py | 50 +- yt_dlp/extractor/mx3.py | 10 +- yt_dlp/extractor/mxplayer.py | 25 +- yt_dlp/extractor/myspace.py | 14 +- yt_dlp/extractor/myspass.py | 3 +- yt_dlp/extractor/mzaalo.py | 6 +- yt_dlp/extractor/n1.py | 6 +- yt_dlp/extractor/nate.py | 24 +- yt_dlp/extractor/nationalgeographic.py | 2 +- yt_dlp/extractor/naver.py | 12 +- yt_dlp/extractor/nba.py | 23 +- yt_dlp/extractor/nbc.py | 29 +- yt_dlp/extractor/ndr.py | 18 +- yt_dlp/extractor/ndtv.py | 26 +- yt_dlp/extractor/nekohacker.py | 32 +- yt_dlp/extractor/neteasemusic.py | 196 +++--- yt_dlp/extractor/netverse.py | 14 +- yt_dlp/extractor/netzkino.py | 6 +- yt_dlp/extractor/newgrounds.py | 10 +- yt_dlp/extractor/newsy.py | 4 +- yt_dlp/extractor/nextmedia.py | 17 +- yt_dlp/extractor/nexx.py | 92 ++- yt_dlp/extractor/nfhsnetwork.py | 52 +- yt_dlp/extractor/nfl.py | 4 +- yt_dlp/extractor/nhk.py | 266 +++++--- yt_dlp/extractor/nhl.py | 10 +- yt_dlp/extractor/nick.py | 18 +- yt_dlp/extractor/niconico.py | 74 +-- yt_dlp/extractor/niconicochannelplus.py | 4 +- yt_dlp/extractor/ninaprotocol.py | 10 +- yt_dlp/extractor/ninecninemedia.py | 10 +- yt_dlp/extractor/ninegag.py | 6 +- yt_dlp/extractor/ninenews.py | 4 +- yt_dlp/extractor/ninenow.py | 21 +- yt_dlp/extractor/nintendo.py | 2 +- yt_dlp/extractor/nitter.py | 18 +- yt_dlp/extractor/nobelprize.py | 2 +- yt_dlp/extractor/noice.py | 6 +- yt_dlp/extractor/nonktube.py | 2 +- yt_dlp/extractor/noodlemagazine.py | 6 +- yt_dlp/extractor/noovo.py | 7 +- yt_dlp/extractor/nosnl.py | 6 +- yt_dlp/extractor/nova.py | 8 +- yt_dlp/extractor/novaplay.py | 4 +- yt_dlp/extractor/nowness.py | 7 +- yt_dlp/extractor/noz.py | 9 +- yt_dlp/extractor/npo.py | 40 +- yt_dlp/extractor/npr.py | 4 +- yt_dlp/extractor/nrk.py | 72 +- yt_dlp/extractor/ntvru.py | 6 +- yt_dlp/extractor/nubilesporn.py | 6 +- yt_dlp/extractor/nuevo.py | 2 +- yt_dlp/extractor/nuum.py | 6 +- yt_dlp/extractor/nuvid.py | 8 +- yt_dlp/extractor/nytimes.py | 2 +- yt_dlp/extractor/nzherald.py | 21 +- yt_dlp/extractor/nzonscreen.py | 2 +- yt_dlp/extractor/odkmedia.py | 4 +- yt_dlp/extractor/odnoklassniki.py | 19 +- yt_dlp/extractor/oftv.py | 8 +- yt_dlp/extractor/oktoberfesttv.py | 2 +- yt_dlp/extractor/olympics.py | 8 +- yt_dlp/extractor/on24.py | 6 +- yt_dlp/extractor/onefootball.py | 2 +- yt_dlp/extractor/onenewsnz.py | 10 +- yt_dlp/extractor/oneplace.py | 4 +- yt_dlp/extractor/onet.py | 6 +- yt_dlp/extractor/onionstudios.py | 3 +- yt_dlp/extractor/opencast.py | 2 +- yt_dlp/extractor/openload.py | 10 +- yt_dlp/extractor/openrec.py | 7 +- yt_dlp/extractor/ora.py | 8 +- yt_dlp/extractor/orf.py | 160 +++-- yt_dlp/extractor/outsidetv.py | 2 +- yt_dlp/extractor/packtpub.py | 9 +- yt_dlp/extractor/palcomp3.py | 11 +- yt_dlp/extractor/panopto.py | 66 +- yt_dlp/extractor/paramountplus.py | 8 +- yt_dlp/extractor/parler.py | 2 +- yt_dlp/extractor/parlview.py | 7 +- yt_dlp/extractor/patreon.py | 43 +- yt_dlp/extractor/pbs.py | 37 +- yt_dlp/extractor/pearvideo.py | 4 +- yt_dlp/extractor/peertube.py | 71 +- yt_dlp/extractor/peertv.py | 2 +- yt_dlp/extractor/peloton.py | 26 +- yt_dlp/extractor/performgroup.py | 11 +- yt_dlp/extractor/periscope.py | 12 +- yt_dlp/extractor/philharmoniedeparis.py | 7 +- yt_dlp/extractor/phoenix.py | 9 +- yt_dlp/extractor/photobucket.py | 6 +- yt_dlp/extractor/piapro.py | 15 +- yt_dlp/extractor/picarto.py | 14 +- yt_dlp/extractor/piksel.py | 8 +- yt_dlp/extractor/pinkbike.py | 8 +- yt_dlp/extractor/pinterest.py | 17 +- yt_dlp/extractor/pixivsketch.py | 4 +- yt_dlp/extractor/pladform.py | 12 +- yt_dlp/extractor/planetmarathi.py | 15 +- yt_dlp/extractor/platzi.py | 22 +- yt_dlp/extractor/playsuisse.py | 20 +- yt_dlp/extractor/playtvak.py | 24 +- yt_dlp/extractor/playwire.py | 2 +- yt_dlp/extractor/pluralsight.py | 69 +- yt_dlp/extractor/plutotv.py | 25 +- yt_dlp/extractor/podbayfm.py | 41 +- yt_dlp/extractor/podchaser.py | 14 +- yt_dlp/extractor/podomatic.py | 11 +- yt_dlp/extractor/pokemon.py | 14 +- yt_dlp/extractor/pokergo.py | 29 +- yt_dlp/extractor/polsatgo.py | 6 +- yt_dlp/extractor/polskieradio.py | 17 +- yt_dlp/extractor/popcorntimes.py | 5 +- yt_dlp/extractor/popcorntv.py | 2 +- yt_dlp/extractor/pornbox.py | 12 +- yt_dlp/extractor/pornflip.py | 2 +- yt_dlp/extractor/pornhub.py | 66 +- yt_dlp/extractor/pornotube.py | 11 +- yt_dlp/extractor/pornovoisines.py | 6 +- yt_dlp/extractor/pornoxo.py | 2 +- yt_dlp/extractor/pr0gramm.py | 4 +- yt_dlp/extractor/prankcast.py | 24 +- yt_dlp/extractor/premiershiprugby.py | 2 +- yt_dlp/extractor/presstv.py | 10 +- yt_dlp/extractor/projectveritas.py | 10 +- yt_dlp/extractor/prosiebensat1.py | 20 +- yt_dlp/extractor/prx.py | 68 +- yt_dlp/extractor/puhutv.py | 41 +- yt_dlp/extractor/puls4.py | 3 +- yt_dlp/extractor/pyvideo.py | 7 +- yt_dlp/extractor/qingting.py | 4 +- yt_dlp/extractor/qqmusic.py | 559 ++++++++++------ yt_dlp/extractor/r7.py | 6 +- yt_dlp/extractor/radiko.py | 8 +- yt_dlp/extractor/radiocanada.py | 8 +- yt_dlp/extractor/radiocomercial.py | 14 +- yt_dlp/extractor/radiode.py | 4 +- yt_dlp/extractor/radiofrance.py | 4 +- yt_dlp/extractor/radiojavan.py | 2 +- yt_dlp/extractor/radiokapital.py | 4 +- yt_dlp/extractor/radiozet.py | 2 +- yt_dlp/extractor/radlive.py | 10 +- yt_dlp/extractor/rai.py | 20 +- yt_dlp/extractor/raywenderlich.py | 16 +- yt_dlp/extractor/rbgtum.py | 10 +- yt_dlp/extractor/rcs.py | 30 +- yt_dlp/extractor/rcti.py | 30 +- yt_dlp/extractor/rds.py | 7 +- yt_dlp/extractor/redbee.py | 30 +- yt_dlp/extractor/redbulltv.py | 19 +- yt_dlp/extractor/redge.py | 4 +- yt_dlp/extractor/redgifs.py | 30 +- yt_dlp/extractor/redtube.py | 4 +- yt_dlp/extractor/rentv.py | 7 +- yt_dlp/extractor/restudy.py | 4 +- yt_dlp/extractor/reuters.py | 8 +- yt_dlp/extractor/reverbnation.py | 6 +- yt_dlp/extractor/ridehome.py | 4 +- yt_dlp/extractor/rinsefm.py | 14 +- yt_dlp/extractor/rmcdecouverte.py | 8 +- yt_dlp/extractor/rockstargames.py | 2 +- yt_dlp/extractor/rokfin.py | 18 +- yt_dlp/extractor/roosterteeth.py | 6 +- yt_dlp/extractor/rottentomatoes.py | 4 +- yt_dlp/extractor/rozhlas.py | 16 +- yt_dlp/extractor/rte.py | 2 +- yt_dlp/extractor/rtl2.py | 4 +- yt_dlp/extractor/rtlnl.py | 26 +- yt_dlp/extractor/rtnews.py | 60 +- yt_dlp/extractor/rtp.py | 2 +- yt_dlp/extractor/rtrfm.py | 4 +- yt_dlp/extractor/rts.py | 11 +- yt_dlp/extractor/rtvcplay.py | 6 +- yt_dlp/extractor/rtve.py | 20 +- yt_dlp/extractor/rtvs.py | 8 +- yt_dlp/extractor/rtvslo.py | 164 +++-- yt_dlp/extractor/rule34video.py | 8 +- yt_dlp/extractor/rumble.py | 26 +- yt_dlp/extractor/rutube.py | 15 +- yt_dlp/extractor/rutv.py | 10 +- yt_dlp/extractor/ruutu.py | 16 +- yt_dlp/extractor/ruv.py | 8 +- yt_dlp/extractor/s4c.py | 6 +- yt_dlp/extractor/safari.py | 29 +- yt_dlp/extractor/saitosan.py | 8 +- yt_dlp/extractor/samplefocus.py | 8 +- yt_dlp/extractor/sapo.py | 2 +- yt_dlp/extractor/sbscokr.py | 4 +- yt_dlp/extractor/screencast.py | 15 +- yt_dlp/extractor/screencastomatic.py | 2 +- yt_dlp/extractor/scrippsnetworks.py | 12 +- yt_dlp/extractor/scrolller.py | 14 +- yt_dlp/extractor/scte.py | 6 +- yt_dlp/extractor/senategov.py | 19 +- yt_dlp/extractor/sendtonews.py | 6 +- yt_dlp/extractor/servus.py | 2 +- yt_dlp/extractor/sevenplus.py | 7 +- yt_dlp/extractor/sexu.py | 2 +- yt_dlp/extractor/seznamzpravy.py | 12 +- yt_dlp/extractor/shahid.py | 18 +- yt_dlp/extractor/shemaroome.py | 21 +- yt_dlp/extractor/showroomlive.py | 9 +- yt_dlp/extractor/sibnet.py | 4 +- yt_dlp/extractor/simplecast.py | 6 +- yt_dlp/extractor/sina.py | 7 +- yt_dlp/extractor/sixplay.py | 11 +- yt_dlp/extractor/skeb.py | 16 +- yt_dlp/extractor/sky.py | 2 +- yt_dlp/extractor/skyit.py | 10 +- yt_dlp/extractor/skylinewebcams.py | 2 +- yt_dlp/extractor/skynewsarabia.py | 11 +- yt_dlp/extractor/skynewsau.py | 12 +- yt_dlp/extractor/slideshare.py | 8 +- yt_dlp/extractor/slideslive.py | 2 +- yt_dlp/extractor/slutload.py | 12 +- yt_dlp/extractor/snotr.py | 2 +- yt_dlp/extractor/sohu.py | 48 +- yt_dlp/extractor/sonyliv.py | 6 +- yt_dlp/extractor/soundcloud.py | 106 +-- yt_dlp/extractor/soundgasm.py | 4 +- yt_dlp/extractor/southpark.py | 4 +- yt_dlp/extractor/spankbang.py | 11 +- yt_dlp/extractor/spiegel.py | 4 +- yt_dlp/extractor/sport5.py | 6 +- yt_dlp/extractor/sportdeutschland.py | 12 +- yt_dlp/extractor/spotify.py | 6 +- yt_dlp/extractor/spreaker.py | 23 +- yt_dlp/extractor/springboardplatform.py | 5 +- yt_dlp/extractor/sproutvideo.py | 198 ++++++ yt_dlp/extractor/srgssr.py | 15 +- yt_dlp/extractor/srmediathek.py | 2 +- yt_dlp/extractor/stageplus.py | 2 +- yt_dlp/extractor/stanfordoc.py | 18 +- yt_dlp/extractor/startrek.py | 4 +- yt_dlp/extractor/startv.py | 31 +- yt_dlp/extractor/steam.py | 24 +- yt_dlp/extractor/stitcher.py | 5 +- yt_dlp/extractor/storyfire.py | 8 +- yt_dlp/extractor/streamable.py | 10 +- yt_dlp/extractor/streamcz.py | 14 +- yt_dlp/extractor/streetvoice.py | 8 +- yt_dlp/extractor/stretchinternet.py | 2 +- yt_dlp/extractor/stripchat.py | 2 +- yt_dlp/extractor/stv.py | 9 +- yt_dlp/extractor/substack.py | 8 +- yt_dlp/extractor/sunporno.py | 4 +- yt_dlp/extractor/sverigesradio.py | 2 +- yt_dlp/extractor/svt.py | 43 +- yt_dlp/extractor/swearnet.py | 8 +- yt_dlp/extractor/syfy.py | 4 +- yt_dlp/extractor/syvdk.py | 4 +- yt_dlp/extractor/tagesschau.py | 4 +- yt_dlp/extractor/taptap.py | 24 +- yt_dlp/extractor/tbs.py | 19 +- yt_dlp/extractor/tbsjp.py | 6 +- yt_dlp/extractor/teachable.py | 35 +- yt_dlp/extractor/teachertube.py | 12 +- yt_dlp/extractor/ted.py | 22 +- yt_dlp/extractor/tele13.py | 2 +- yt_dlp/extractor/telecaribe.py | 2 +- yt_dlp/extractor/telecinco.py | 2 +- yt_dlp/extractor/telegraaf.py | 8 +- yt_dlp/extractor/telegram.py | 2 +- yt_dlp/extractor/telemb.py | 6 +- yt_dlp/extractor/telemundo.py | 4 +- yt_dlp/extractor/telequebec.py | 7 +- yt_dlp/extractor/teletask.py | 8 +- yt_dlp/extractor/telewebion.py | 2 +- yt_dlp/extractor/tempo.py | 18 +- yt_dlp/extractor/tencent.py | 2 +- yt_dlp/extractor/tennistv.py | 14 +- yt_dlp/extractor/tenplay.py | 6 +- yt_dlp/extractor/testurl.py | 2 +- yt_dlp/extractor/tf1.py | 4 +- yt_dlp/extractor/tfo.py | 4 +- yt_dlp/extractor/theguardian.py | 31 +- yt_dlp/extractor/theholetv.py | 6 +- yt_dlp/extractor/theintercept.py | 7 +- yt_dlp/extractor/theplatform.py | 32 +- yt_dlp/extractor/thestar.py | 2 +- yt_dlp/extractor/theweatherchannel.py | 8 +- yt_dlp/extractor/thisamericanlife.py | 4 +- yt_dlp/extractor/thisvid.py | 4 +- yt_dlp/extractor/threeqsdn.py | 6 +- yt_dlp/extractor/threespeak.py | 28 +- yt_dlp/extractor/tiktok.py | 89 ++- yt_dlp/extractor/tmz.py | 6 +- yt_dlp/extractor/tnaflix.py | 13 +- yt_dlp/extractor/toggle.py | 24 +- yt_dlp/extractor/tonline.py | 4 +- yt_dlp/extractor/toongoggles.py | 2 +- yt_dlp/extractor/toutv.py | 2 +- yt_dlp/extractor/toypics.py | 8 +- yt_dlp/extractor/traileraddict.py | 4 +- yt_dlp/extractor/trovo.py | 4 +- yt_dlp/extractor/trtcocuk.py | 6 +- yt_dlp/extractor/trtworld.py | 10 +- yt_dlp/extractor/trueid.py | 6 +- yt_dlp/extractor/trutv.py | 2 +- yt_dlp/extractor/tube8.py | 10 +- yt_dlp/extractor/tubetugraz.py | 79 +-- yt_dlp/extractor/tubitv.py | 160 +++-- yt_dlp/extractor/tumblr.py | 14 +- yt_dlp/extractor/tunein.py | 4 +- yt_dlp/extractor/turner.py | 9 +- yt_dlp/extractor/tv2.py | 18 +- yt_dlp/extractor/tv24ua.py | 6 +- yt_dlp/extractor/tv2dk.py | 2 +- yt_dlp/extractor/tv2hu.py | 19 +- yt_dlp/extractor/tv4.py | 2 +- yt_dlp/extractor/tv5unis.py | 12 +- yt_dlp/extractor/tvanouvelles.py | 4 +- yt_dlp/extractor/tvc.py | 2 +- yt_dlp/extractor/tver.py | 2 +- yt_dlp/extractor/tvigle.py | 8 +- yt_dlp/extractor/tviplayer.py | 8 +- yt_dlp/extractor/tvn24.py | 4 +- yt_dlp/extractor/tvnoe.py | 4 +- yt_dlp/extractor/tvp.py | 13 +- yt_dlp/extractor/tvplay.py | 14 +- yt_dlp/extractor/tvplayer.py | 7 +- yt_dlp/extractor/tweakers.py | 4 +- yt_dlp/extractor/twentymin.py | 6 +- yt_dlp/extractor/twentythreevideo.py | 4 +- yt_dlp/extractor/twitcasting.py | 14 +- yt_dlp/extractor/twitch.py | 114 ++-- yt_dlp/extractor/twitter.py | 79 ++- yt_dlp/extractor/txxx.py | 38 +- yt_dlp/extractor/udemy.py | 44 +- yt_dlp/extractor/udn.py | 8 +- yt_dlp/extractor/uktvplay.py | 2 +- yt_dlp/extractor/umg.py | 4 +- yt_dlp/extractor/unistra.py | 8 +- yt_dlp/extractor/unity.py | 2 +- yt_dlp/extractor/uol.py | 16 +- yt_dlp/extractor/urort.py | 12 +- yt_dlp/extractor/urplay.py | 8 +- yt_dlp/extractor/usatoday.py | 7 +- yt_dlp/extractor/ustream.py | 33 +- yt_dlp/extractor/ustudio.py | 12 +- yt_dlp/extractor/utreon.py | 10 +- yt_dlp/extractor/veo.py | 6 +- yt_dlp/extractor/veoh.py | 16 +- yt_dlp/extractor/vesti.py | 4 +- yt_dlp/extractor/vevo.py | 43 +- yt_dlp/extractor/vgtv.py | 19 +- yt_dlp/extractor/vh1.py | 2 +- yt_dlp/extractor/vice.py | 16 +- yt_dlp/extractor/viddler.py | 6 +- yt_dlp/extractor/videa.py | 7 +- yt_dlp/extractor/videocampus_sachsen.py | 34 +- yt_dlp/extractor/videofyme.py | 2 +- yt_dlp/extractor/videoken.py | 2 +- yt_dlp/extractor/videomore.py | 13 +- yt_dlp/extractor/videopress.py | 6 +- yt_dlp/extractor/vidio.py | 22 +- yt_dlp/extractor/vidlii.py | 4 +- yt_dlp/extractor/vidly.py | 2 +- yt_dlp/extractor/viewlift.py | 29 +- yt_dlp/extractor/viidea.py | 23 +- yt_dlp/extractor/viki.py | 28 +- yt_dlp/extractor/vimeo.py | 86 +-- yt_dlp/extractor/vine.py | 15 +- yt_dlp/extractor/viously.py | 2 +- yt_dlp/extractor/viqeo.py | 2 +- yt_dlp/extractor/viu.py | 39 +- yt_dlp/extractor/vk.py | 10 +- yt_dlp/extractor/vodplatform.py | 2 +- yt_dlp/extractor/voicy.py | 19 +- yt_dlp/extractor/volejtv.py | 4 +- yt_dlp/extractor/voxmedia.py | 9 +- yt_dlp/extractor/vrt.py | 22 +- yt_dlp/extractor/vtm.py | 4 +- yt_dlp/extractor/vuclip.py | 10 +- yt_dlp/extractor/vvvvid.py | 22 +- yt_dlp/extractor/walla.py | 4 +- yt_dlp/extractor/washingtonpost.py | 6 +- yt_dlp/extractor/wat.py | 5 +- yt_dlp/extractor/wdr.py | 29 +- yt_dlp/extractor/webcamerapl.py | 4 +- yt_dlp/extractor/webcaster.py | 2 +- yt_dlp/extractor/webofstories.py | 18 +- yt_dlp/extractor/weibo.py | 11 +- yt_dlp/extractor/wevidi.py | 12 +- yt_dlp/extractor/whowatch.py | 21 +- yt_dlp/extractor/wikimedia.py | 4 +- yt_dlp/extractor/wimtv.py | 26 +- yt_dlp/extractor/wistia.py | 24 +- yt_dlp/extractor/wordpress.py | 14 +- yt_dlp/extractor/worldstarhiphop.py | 4 +- yt_dlp/extractor/wppilot.py | 4 +- yt_dlp/extractor/wsj.py | 7 +- yt_dlp/extractor/wwe.py | 7 +- yt_dlp/extractor/wykop.py | 2 +- yt_dlp/extractor/xanimu.py | 19 +- yt_dlp/extractor/xboxclips.py | 4 +- yt_dlp/extractor/xhamster.py | 27 +- yt_dlp/extractor/xiaohongshu.py | 4 +- yt_dlp/extractor/ximalaya.py | 30 +- yt_dlp/extractor/xinpianchang.py | 4 +- yt_dlp/extractor/xminus.py | 2 +- yt_dlp/extractor/xnxx.py | 2 +- yt_dlp/extractor/xstream.py | 5 +- yt_dlp/extractor/xvideos.py | 48 +- yt_dlp/extractor/xxxymovies.py | 2 +- yt_dlp/extractor/yahoo.py | 22 +- yt_dlp/extractor/yandexdisk.py | 8 +- yt_dlp/extractor/yandexmusic.py | 73 +-- yt_dlp/extractor/yandexvideo.py | 12 +- yt_dlp/extractor/yapfiles.py | 6 +- yt_dlp/extractor/yappy.py | 12 +- yt_dlp/extractor/yle_areena.py | 10 +- yt_dlp/extractor/youjizz.py | 2 +- yt_dlp/extractor/youku.py | 4 +- yt_dlp/extractor/younow.py | 45 +- yt_dlp/extractor/youporn.py | 6 +- yt_dlp/extractor/youtube.py | 403 ++++++------ yt_dlp/extractor/zaiko.py | 2 +- yt_dlp/extractor/zapiks.py | 4 +- yt_dlp/extractor/zattoo.py | 49 +- yt_dlp/extractor/zdf.py | 31 +- yt_dlp/extractor/zee5.py | 33 +- yt_dlp/extractor/zeenews.py | 6 +- yt_dlp/extractor/zenporn.py | 8 +- yt_dlp/extractor/zetland.py | 4 +- yt_dlp/extractor/zhihu.py | 2 +- yt_dlp/extractor/zingmp3.py | 12 +- yt_dlp/extractor/zoom.py | 10 +- yt_dlp/extractor/zype.py | 6 +- yt_dlp/jsinterp.py | 24 +- yt_dlp/networking/__init__.py | 2 +- yt_dlp/networking/_curlcffi.py | 2 +- yt_dlp/networking/_helper.py | 4 +- yt_dlp/networking/_requests.py | 33 +- yt_dlp/networking/_urllib.py | 8 +- yt_dlp/networking/_websockets.py | 8 +- yt_dlp/networking/common.py | 28 +- yt_dlp/networking/exceptions.py | 2 +- yt_dlp/networking/impersonate.py | 6 +- yt_dlp/options.py | 48 +- yt_dlp/postprocessor/__init__.py | 2 +- yt_dlp/postprocessor/common.py | 6 +- yt_dlp/postprocessor/embedthumbnail.py | 37 +- yt_dlp/postprocessor/exec.py | 5 +- yt_dlp/postprocessor/ffmpeg.py | 60 +- yt_dlp/postprocessor/modify_chapters.py | 2 +- yt_dlp/postprocessor/movefilesafterdownload.py | 7 +- yt_dlp/postprocessor/sponskrub.py | 4 +- yt_dlp/postprocessor/sponsorblock.py | 10 +- yt_dlp/socks.py | 8 +- yt_dlp/update.py | 10 +- yt_dlp/utils/_legacy.py | 10 +- yt_dlp/utils/_utils.py | 311 +++++---- yt_dlp/utils/networking.py | 4 +- yt_dlp/version.py | 6 +- yt_dlp/webvtt.py | 17 +- 889 files changed, 9055 insertions(+), 8001 deletions(-) create mode 100644 yt_dlp/extractor/graspop.py create mode 100644 yt_dlp/extractor/laracasts.py delete mode 100644 yt_dlp/extractor/microsoftvirtualacademy.py create mode 100644 yt_dlp/extractor/sproutvideo.py (limited to 'yt_dlp') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c6f695..e56c3ed 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -4,6 +4,7 @@ import copy import datetime as dt import errno import fileinput +import functools import http.cookiejar import io import itertools @@ -24,7 +25,7 @@ import traceback import unicodedata from .cache import Cache -from .compat import functools, urllib # isort: split +from .compat import urllib # isort: split from .compat import compat_os_name, urllib_req_to_req from .cookies import LenientSimpleCookie, load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name @@ -109,7 +110,6 @@ from .utils import ( determine_protocol, encode_compat_str, encodeFilename, - error_to_compat_str, escapeHTML, expand_path, extract_basic_auth, @@ -159,7 +159,7 @@ from .utils import ( write_json_file, write_string, ) -from .utils._utils import _YDLLogger +from .utils._utils import _UnsafeExtensionError, _YDLLogger from .utils.networking import ( HTTPHeaderDict, clean_headers, @@ -172,6 +172,20 @@ if compat_os_name == 'nt': import ctypes +def _catch_unsafe_extension_error(func): + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + try: + return func(self, *args, **kwargs) + except _UnsafeExtensionError as error: + self.report_error( + f'The extracted extension ({error.extension!r}) is unusual ' + 'and will be skipped for safety reasons. ' + f'If you believe this is an error{bug_reports_message(",")}') + + return wrapper + + class YoutubeDL: """YoutubeDL class. @@ -454,8 +468,9 @@ class YoutubeDL: Set the value to 'native' to use the native downloader compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: - filename, abort-on-error, multistreams, no-live-chat, format-sort - no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. + filename, abort-on-error, multistreams, no-live-chat, + format-sort, no-clean-infojson, no-playlist-metafiles, + no-keep-subs, no-attach-info-json, allow-unsafe-ext. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', @@ -582,8 +597,9 @@ class YoutubeDL: 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies', - 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', - 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' + 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url', + 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', + 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', } _deprecated_multivalue_fields = { 'album_artist': 'album_artists', @@ -594,7 +610,7 @@ class YoutubeDL: } _format_selection_exts = { 'audio': set(MEDIA_EXTENSIONS.common_audio), - 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), + 'video': {*MEDIA_EXTENSIONS.common_video, '3gp'}, 'storyboards': set(MEDIA_EXTENSIONS.storyboards), } @@ -628,7 +644,7 @@ class YoutubeDL: error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, console=None if compat_os_name == 'nt' else next( - filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) + filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), ) try: @@ -679,9 +695,9 @@ class YoutubeDL: width_args = [] if width is None else ['-w', str(width)] sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} try: - self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) + self._output_process = Popen(['bidiv', *width_args], **sp_kwargs) except OSError: - self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: @@ -822,8 +838,7 @@ class YoutubeDL: ) self.report_warning( 'Long argument string detected. ' - 'Use -- to separate parameters and URLs, like this:\n%s' % - shell_quote(correct_argv)) + f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}') def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -922,7 +937,7 @@ class YoutubeDL: if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'): return self._write_string( - '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')), self._out_files.screen, only_once=only_once) def to_stderr(self, message, only_once=False): @@ -1045,10 +1060,10 @@ class YoutubeDL: return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs) def report_warning(self, message, only_once=False): - ''' + """ Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored - ''' + """ if self.params.get('logger') is not None: self.params['logger'].warning(message) else: @@ -1066,14 +1081,14 @@ class YoutubeDL: self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True) def report_error(self, message, *args, **kwargs): - ''' + """ Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. - ''' + """ self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) def write_debug(self, message, only_once=False): - '''Log debug message or Print message to stderr''' + """Log debug message or Print message to stderr""" if not self.params.get('verbose', False): return message = f'[debug] {message}' @@ -1085,14 +1100,14 @@ class YoutubeDL: def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: - self.to_screen('[download] %s has already been downloaded' % file_name) + self.to_screen(f'[download] {file_name} has already been downloaded') except UnicodeEncodeError: self.to_screen('[download] The file has already been downloaded') def report_file_delete(self, file_name): """Report that existing file will be deleted.""" try: - self.to_screen('Deleting existing file %s' % file_name) + self.to_screen(f'Deleting existing file {file_name}') except UnicodeEncodeError: self.to_screen('Deleting existing file') @@ -1147,7 +1162,7 @@ class YoutubeDL: @staticmethod def escape_outtmpl(outtmpl): - ''' Escape any remaining strings like %s, %abc% etc. ''' + """ Escape any remaining strings like %s, %abc% etc. """ return re.sub( STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), @@ -1155,7 +1170,7 @@ class YoutubeDL: @classmethod def validate_outtmpl(cls, outtmpl): - ''' @return None or Exception object ''' + """ @return None or Exception object """ outtmpl = re.sub( STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'), lambda mobj: f'{mobj.group(0)[:-1]}s', @@ -1208,13 +1223,13 @@ class YoutubeDL: } # Field is of the form key1.key2... # where keys (except first) can be string, int, slice or "{field, ...}" - FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} - FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { + FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031 + FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031 'inner': FIELD_INNER_RE, - 'field': rf'\w*(?:\.{FIELD_INNER_RE})*' + 'field': rf'\w*(?:\.{FIELD_INNER_RE})*', } MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' - MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) + MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys()))) INTERNAL_FORMAT_RE = re.compile(rf'''(?xs) (?P-)? (?P{FIELD_RE}) @@ -1337,7 +1352,7 @@ class YoutubeDL: value, default = None, na fmt = outer_mobj.group('format') - if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int): + if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int): fmt = f'0{field_size_compat_map[last_field]:d}d' flags = outer_mobj.group('conversion') or '' @@ -1362,7 +1377,7 @@ class YoutubeDL: elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( # "+" = compatibility equivalence, "#" = NFD - 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), + 'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), value), str_fmt elif fmt[-1] == 'D': # decimal suffix num_fmt, fmt = fmt[:-1].replace('#', ''), 's' @@ -1390,7 +1405,7 @@ class YoutubeDL: if fmt[-1] in 'csra': value = sanitizer(last_field, value) - key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) + key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) @@ -1400,6 +1415,7 @@ class YoutubeDL: outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) return self.escape_outtmpl(outtmpl) % info_dict + @_catch_unsafe_extension_error def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None): assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive' if outtmpl is None: @@ -1479,9 +1495,9 @@ class YoutubeDL: date = info_dict.get('upload_date') if date is not None: - dateRange = self.params.get('daterange', DateRange()) - if date not in dateRange: - return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}' + date_range = self.params.get('daterange', DateRange()) + if date not in date_range: + return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}' view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') @@ -1491,7 +1507,7 @@ class YoutubeDL: if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): - return 'Skipping "%s" because it is age restricted' % video_title + return f'Skipping "{video_title}" because it is age restricted' match_filter = self.params.get('match_filter') if match_filter is None: @@ -1544,7 +1560,7 @@ class YoutubeDL: @staticmethod def add_extra_info(info_dict, extra_info): - '''Set the keys from extra_info in info dict if they are missing''' + """Set the keys from extra_info in info dict if they are missing""" for key, value in extra_info.items(): info_dict.setdefault(key, value) @@ -1590,7 +1606,7 @@ class YoutubeDL: self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: ' 'has already been recorded in the archive') if self.params.get('break_on_existing', False): - raise ExistingVideoReached() + raise ExistingVideoReached break return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process) else: @@ -1616,8 +1632,8 @@ class YoutubeDL: except GeoRestrictedError as e: msg = e.msg if e.countries: - msg += '\nThis video is available in %s.' % ', '.join( - map(ISO3166Utils.short2full, e.countries)) + msg += '\nThis video is available in {}.'.format(', '.join( + map(ISO3166Utils.short2full, e.countries))) msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' self.report_error(msg) except ExtractorError as e: # An error we somewhat expected @@ -1826,8 +1842,8 @@ class YoutubeDL: if isinstance(additional_urls, str): additional_urls = [additional_urls] self.to_screen( - '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls))) - self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) + '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls))) + self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls))) ie_result['additional_entries'] = [ self.extract_info( url, download, extra_info=extra_info, @@ -1879,8 +1895,8 @@ class YoutubeDL: webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url if webpage_url and webpage_url in self._playlist_urls: self.to_screen( - '[download] Skipping already downloaded playlist: %s' - % ie_result.get('title') or ie_result.get('id')) + '[download] Skipping already downloaded playlist: {}'.format( + ie_result.get('title')) or ie_result.get('id')) return self._playlist_level += 1 @@ -1895,8 +1911,8 @@ class YoutubeDL: self._playlist_urls.clear() elif result_type == 'compat_list': self.report_warning( - 'Extractor %s returned a compat_list result. ' - 'It needs to be updated.' % ie_result.get('extractor')) + 'Extractor {} returned a compat_list result. ' + 'It needs to be updated.'.format(ie_result.get('extractor'))) def _fixup(r): self.add_extra_info(r, { @@ -1913,7 +1929,7 @@ class YoutubeDL: ] return ie_result else: - raise Exception('Invalid result type: %s' % result_type) + raise Exception(f'Invalid result type: {result_type}') def _ensure_dir_exists(self, path): return make_dir(path, self.report_error) @@ -1927,6 +1943,8 @@ class YoutubeDL: 'playlist_title': ie_result.get('title'), 'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader_id': ie_result.get('uploader_id'), + 'playlist_channel': ie_result.get('channel'), + 'playlist_channel_id': ie_result.get('channel_id'), **kwargs, } if strict: @@ -2029,8 +2047,9 @@ class YoutubeDL: resolved_entries[i] = (playlist_index, NO_DEFAULT) continue - self.to_screen('[download] Downloading item %s of %s' % ( - self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) + self.to_screen( + f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} ' + f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}') entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, @@ -2080,9 +2099,9 @@ class YoutubeDL: } operator_rex = re.compile(r'''(?x)\s* (?P[\w.-]+)\s* - (?P%s)(?P\s*\?)?\s* + (?P{})(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* - ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + '''.format('|'.join(map(re.escape, OPERATORS.keys())))) m = operator_rex.fullmatch(filter_spec) if m: try: @@ -2093,7 +2112,7 @@ class YoutubeDL: comparison_value = parse_filesize(m.group('value') + 'B') if comparison_value is None: raise ValueError( - 'Invalid value %r in format specification %r' % ( + 'Invalid value {!r} in format specification {!r}'.format( m.group('value'), filter_spec)) op = OPERATORS[m.group('op')] @@ -2103,15 +2122,15 @@ class YoutubeDL: '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, - '~=': lambda attr, value: value.search(attr) is not None + '~=': lambda attr, value: value.search(attr) is not None, } str_operator_rex = re.compile(r'''(?x)\s* (?P[a-zA-Z0-9._-]+)\s* - (?P!\s*)?(?P%s)\s*(?P\?\s*)? + (?P!\s*)?(?P{})\s*(?P\?\s*)? (?P["'])? (?P(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+)) (?(quote)(?P=quote))\s* - ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) + '''.format('|'.join(map(re.escape, STR_OPERATORS.keys())))) m = str_operator_rex.fullmatch(filter_spec) if m: if m.group('op') == '~=': @@ -2125,7 +2144,7 @@ class YoutubeDL: op = str_op if not m: - raise SyntaxError('Invalid filter specification %r' % filter_spec) + raise SyntaxError(f'Invalid filter specification {filter_spec!r}') def _filter(f): actual_value = f.get(m.group('key')) @@ -2141,7 +2160,7 @@ class YoutubeDL: if working: yield f continue - self.to_screen('[info] Testing format %s' % f['format_id']) + self.to_screen('[info] Testing format {}'.format(f['format_id'])) path = self.get_output_path('temp') if not self._ensure_dir_exists(f'{path}/'): continue @@ -2149,19 +2168,19 @@ class YoutubeDL: temp_file.close() try: success, _ = self.dl(temp_file.name, f, test=True) - except (DownloadError, OSError, ValueError) + network_exceptions: + except (DownloadError, OSError, ValueError, *network_exceptions): success = False finally: if os.path.exists(temp_file.name): try: os.remove(temp_file.name) except OSError: - self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') f['__working'] = success if success: yield f else: - self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) def _select_formats(self, formats, selector): return list(selector({ @@ -2214,8 +2233,8 @@ class YoutubeDL: def _parse_filter(tokens): filter_parts = [] - for type, string_, start, _, _ in tokens: - if type == tokenize.OP and string_ == ']': + for type_, string_, _start, _, _ in tokens: + if type_ == tokenize.OP and string_ == ']': return ''.join(filter_parts) else: filter_parts.append(string_) @@ -2225,23 +2244,23 @@ class YoutubeDL: # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' ALLOWED_OPS = ('/', '+', ',', '(', ')') last_string, last_start, last_end, last_line = None, None, None, None - for type, string_, start, end, line in tokens: - if type == tokenize.OP and string_ == '[': + for type_, string_, start, end, line in tokens: + if type_ == tokenize.OP and string_ == '[': if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string_, start, end, line + yield type_, string_, start, end, line # everything inside brackets will be handled by _parse_filter - for type, string_, start, end, line in tokens: - yield type, string_, start, end, line - if type == tokenize.OP and string_ == ']': + for type_, string_, start, end, line in tokens: + yield type_, string_, start, end, line + if type_ == tokenize.OP and string_ == ']': break - elif type == tokenize.OP and string_ in ALLOWED_OPS: + elif type_ == tokenize.OP and string_ in ALLOWED_OPS: if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string_, start, end, line - elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + yield type_, string_, start, end, line + elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: if not last_string: last_string = string_ last_start = start @@ -2254,13 +2273,13 @@ class YoutubeDL: def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None - for type, string_, start, _, _ in tokens: + for type_, string_, start, _, _ in tokens: # ENCODING is only defined in Python 3.x - if type == getattr(tokenize, 'ENCODING', None): + if type_ == getattr(tokenize, 'ENCODING', None): continue - elif type in [tokenize.NAME, tokenize.NUMBER]: + elif type_ in [tokenize.NAME, tokenize.NUMBER]: current_selector = FormatSelector(SINGLE, string_, []) - elif type == tokenize.OP: + elif type_ == tokenize.OP: if string_ == ')': if not inside_group: # ')' will be handled by the parentheses group @@ -2303,7 +2322,7 @@ class YoutubeDL: current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: raise syntax_error(f'Operator not recognized: "{string_}"', start) - elif type == tokenize.ENDMARKER: + elif type_ == tokenize.ENDMARKER: break if current_selector: selectors.append(current_selector) @@ -2378,7 +2397,7 @@ class YoutubeDL: 'acodec': the_only_audio.get('acodec'), 'abr': the_only_audio.get('abr'), 'asr': the_only_audio.get('asr'), - 'audio_channels': the_only_audio.get('audio_channels') + 'audio_channels': the_only_audio.get('audio_channels'), }) return new_dict @@ -2459,9 +2478,9 @@ class YoutubeDL: format_fallback = not format_type and not format_modified # for b, w _filter_f = ( - (lambda f: f.get('%scodec' % format_type) != 'none') + (lambda f: f.get(f'{format_type}codec') != 'none') if format_type and format_modified # bv*, ba*, wv*, wa* - else (lambda f: f.get('%scodec' % not_format_type) == 'none') + else (lambda f: f.get(f'{not_format_type}codec') == 'none') if format_type # bv, ba, wv, wa else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') if not format_modified # b, w @@ -2529,7 +2548,7 @@ class YoutubeDL: def __next__(self): if self.counter >= len(self.tokens): - raise StopIteration() + raise StopIteration value = self.tokens[self.counter] self.counter += 1 return value @@ -2612,7 +2631,7 @@ class YoutubeDL: self._sort_thumbnails(thumbnails) for i, t in enumerate(thumbnails): if t.get('id') is None: - t['id'] = '%d' % i + t['id'] = str(i) if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) @@ -2673,8 +2692,8 @@ class YoutubeDL: # Auto generate title fields corresponding to the *_number fields when missing # in order to always have clean titles. This is very common for TV series. for field in ('chapter', 'season', 'episode'): - if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): - info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number']) for old_key, new_key in self._deprecated_multivalue_fields.items(): if new_key in info_dict and old_key in info_dict: @@ -2706,8 +2725,8 @@ class YoutubeDL: def report_force_conversion(field, field_not, conversion): self.report_warning( - '"%s" field is not %s - forcing %s conversion, there is an error in extractor' - % (field, field_not, conversion)) + f'"{field}" field is not {field_not} - forcing {conversion} conversion, ' + 'there is an error in extractor') def sanitize_string_field(info, string_field): field = info.get(string_field) @@ -2824,28 +2843,28 @@ class YoutubeDL: if not formats: self.raise_no_formats(info_dict) - for format in formats: - sanitize_string_field(format, 'format_id') - sanitize_numeric_fields(format) - format['url'] = sanitize_url(format['url']) - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() - if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): - if format.get('acodec') is None: - format['acodec'] = format['ext'] - if format.get('protocol') is None: - format['protocol'] = determine_protocol(format) - if format.get('resolution') is None: - format['resolution'] = self.format_resolution(format, default=None) - if format.get('dynamic_range') is None and format.get('vcodec') != 'none': - format['dynamic_range'] = 'SDR' - if format.get('aspect_ratio') is None: - format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) + for fmt in formats: + sanitize_string_field(fmt, 'format_id') + sanitize_numeric_fields(fmt) + fmt['url'] = sanitize_url(fmt['url']) + if fmt.get('ext') is None: + fmt['ext'] = determine_ext(fmt['url']).lower() + if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): + if fmt.get('acodec') is None: + fmt['acodec'] = fmt['ext'] + if fmt.get('protocol') is None: + fmt['protocol'] = determine_protocol(fmt) + if fmt.get('resolution') is None: + fmt['resolution'] = self.format_resolution(fmt, default=None) + if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': + fmt['dynamic_range'] = 'SDR' + if fmt.get('aspect_ratio') is None: + fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2)) # For fragmented formats, "tbr" is often max bitrate and not average - if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url')) - and not format.get('filesize') and not format.get('filesize_approx')): - format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration')) - format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True) + if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url')) + and not fmt.get('filesize') and not fmt.get('filesize_approx')): + fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration')) + fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True) # Safeguard against old/insecure infojson when using --load-info-json if info_dict.get('http_headers'): @@ -2858,36 +2877,36 @@ class YoutubeDL: self.sort_formats({ 'formats': formats, - '_format_sort_fields': info_dict.get('_format_sort_fields') + '_format_sort_fields': info_dict.get('_format_sort_fields'), }) # Sanitize and group by format_id formats_dict = {} - for i, format in enumerate(formats): - if not format.get('format_id'): - format['format_id'] = str(i) + for i, fmt in enumerate(formats): + if not fmt.get('format_id'): + fmt['format_id'] = str(i) else: # Sanitize format_id from characters used in format selector expression - format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) - formats_dict.setdefault(format['format_id'], []).append(format) + fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id']) + formats_dict.setdefault(fmt['format_id'], []).append(fmt) # Make sure all formats have unique format_id common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): ambigious_id = len(ambiguous_formats) > 1 - for i, format in enumerate(ambiguous_formats): + for i, fmt in enumerate(ambiguous_formats): if ambigious_id: - format['format_id'] = '%s-%d' % (format_id, i) + fmt['format_id'] = f'{format_id}-{i}' # Ensure there is no conflict between id and ext in format selection # See https://github.com/yt-dlp/yt-dlp/issues/1282 - if format['format_id'] != format['ext'] and format['format_id'] in common_exts: - format['format_id'] = 'f%s' % format['format_id'] - - if format.get('format') is None: - format['format'] = '{id} - {res}{note}'.format( - id=format['format_id'], - res=self.format_resolution(format), - note=format_field(format, 'format_note', ' (%s)'), + if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts: + fmt['format_id'] = 'f{}'.format(fmt['format_id']) + + if fmt.get('format') is None: + fmt['format'] = '{id} - {res}{note}'.format( + id=fmt['format_id'], + res=self.format_resolution(fmt), + note=format_field(fmt, 'format_note', ' (%s)'), ) if self.params.get('check_formats') is True: @@ -3009,7 +3028,7 @@ class YoutubeDL: info_dict['requested_downloads'] = downloaded_formats info_dict = self.run_all_pps('after_video', info_dict) if max_downloads_reached: - raise MaxDownloadsReached() + raise MaxDownloadsReached # We update the info dict with the selected best quality format (backwards compatibility) info_dict.update(best_format) @@ -3070,8 +3089,8 @@ class YoutubeDL: else: f = formats[-1] self.report_warning( - 'No subtitle format found matching "%s" for language %s, ' - 'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext'])) + 'No subtitle format found matching "{}" for language {}, ' + 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext'])) subs[lang] = f return subs @@ -3189,6 +3208,7 @@ class YoutubeDL: os.remove(file) return None + @_catch_unsafe_extension_error def process_info(self, info_dict): """Process a single resolved IE result. (Modifies it in-place)""" @@ -3226,7 +3246,7 @@ class YoutubeDL: def check_max_downloads(): if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'): - raise MaxDownloadsReached() + raise MaxDownloadsReached if self.params.get('simulate'): info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') @@ -3400,7 +3420,7 @@ class YoutubeDL: for f in info_dict['requested_formats'] if fd != FFmpegFD else []: f['filepath'] = fname = prepend_extension( correct_ext(temp_filename, info_dict['ext']), - 'f%s' % f['format_id'], info_dict['ext']) + 'f{}'.format(f['format_id']), info_dict['ext']) downloaded.append(fname) info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats']) success, real_download = self.dl(temp_filename, info_dict) @@ -3433,7 +3453,7 @@ class YoutubeDL: if temp_filename != '-': fname = prepend_extension( correct_ext(temp_filename, new_info['ext']), - 'f%s' % f['format_id'], new_info['ext']) + 'f{}'.format(f['format_id']), new_info['ext']) if not self._ensure_dir_exists(fname): return f['filepath'] = fname @@ -3465,11 +3485,11 @@ class YoutubeDL: info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) except network_exceptions as err: - self.report_error('unable to download video data: %s' % error_to_compat_str(err)) + self.report_error(f'unable to download video data: {err}') return except OSError as err: raise UnavailableVideoError(err) - except (ContentTooShortError, ) as err: + except ContentTooShortError as err: self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return @@ -3536,13 +3556,13 @@ class YoutubeDL: try: replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move)) except PostProcessingError as err: - self.report_error('Postprocessing: %s' % str(err)) + self.report_error(f'Postprocessing: {err}') return try: for ph in self._post_hooks: ph(info_dict['filepath']) except Exception as err: - self.report_error('post hooks: %s' % str(err)) + self.report_error(f'post hooks: {err}') return info_dict['__write_download_archive'] = True @@ -3609,7 +3629,7 @@ class YoutubeDL: @staticmethod def sanitize_info(info_dict, remove_private_keys=False): - ''' Sanitize the infodict for converting to json ''' + """ Sanitize the infodict for converting to json """ if info_dict is None: return info_dict info_dict.setdefault('epoch', int(time.time())) @@ -3644,7 +3664,7 @@ class YoutubeDL: @staticmethod def filter_requested_info(info_dict, actually_filter=True): - ''' Alias of sanitize_info for backward compatibility ''' + """ Alias of sanitize_info for backward compatibility """ return YoutubeDL.sanitize_info(info_dict, actually_filter) def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): @@ -3666,7 +3686,7 @@ class YoutubeDL: actual_post_extract(video_dict or {}) return - post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) + post_extractor = info_dict.pop('__post_extractor', None) or dict info_dict.update(post_extractor()) actual_post_extract(info_dict or {}) @@ -3771,7 +3791,7 @@ class YoutubeDL: if format.get('width') and format.get('height'): return '%dx%d' % (format['width'], format['height']) elif format.get('height'): - return '%sp' % format['height'] + return '{}p'.format(format['height']) elif format.get('width'): return '%dx?' % format['width'] return default @@ -3788,7 +3808,7 @@ class YoutubeDL: if fdict.get('language'): if res: res += ' ' - res += '[%s]' % fdict['language'] + res += '[{}]'.format(fdict['language']) if fdict.get('format_note') is not None: if res: res += ' ' @@ -3800,7 +3820,7 @@ class YoutubeDL: if fdict.get('container') is not None: if res: res += ', ' - res += '%s container' % fdict['container'] + res += '{} container'.format(fdict['container']) if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): if res: @@ -3815,7 +3835,7 @@ class YoutubeDL: if fdict.get('fps') is not None: if res: res += ', ' - res += '%sfps' % fdict['fps'] + res += '{}fps'.format(fdict['fps']) if fdict.get('acodec') is not None: if res: res += ', ' @@ -3858,7 +3878,7 @@ class YoutubeDL: format_field(f, 'format_id'), format_field(f, 'ext'), self.format_resolution(f), - self._format_note(f) + self._format_note(f), ] for f in formats if (f.get('preference') or 0) >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) @@ -3964,11 +3984,11 @@ class YoutubeDL: from .extractor.extractors import _LAZY_LOADER from .extractor.extractors import ( _PLUGIN_CLASSES as plugin_ies, - _PLUGIN_OVERRIDES as plugin_ie_overrides + _PLUGIN_OVERRIDES as plugin_ie_overrides, ) def get_encoding(stream): - ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) + ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})')) additional_info = [] if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') @@ -3979,13 +3999,13 @@ class YoutubeDL: ret = f'{ret} ({",".join(additional_info)})' return ret - encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % ( + encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format( locale.getpreferredencoding(), sys.getfilesystemencoding(), self.get_encoding(), ', '.join( f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ - if stream is not None and key != 'console') + if stream is not None and key != 'console'), ) logger = self.params.get('logger') @@ -4017,7 +4037,7 @@ class YoutubeDL: else: write_debug('Lazy loading extractors is disabled') if self.params['compat_opts']: - write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) + write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts']))) if current_git_head(): write_debug(f'Git HEAD: {current_git_head()}') @@ -4026,14 +4046,14 @@ class YoutubeDL: exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: - exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) + exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features))) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' - write_debug('exe versions: %s' % exe_str) + write_debug(f'exe versions: {exe_str}') from .compat.compat_utils import get_package_info from .dependencies import available_dependencies @@ -4045,7 +4065,7 @@ class YoutubeDL: write_debug(f'Proxy map: {self.proxies}') write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): - display_list = ['%s%s' % ( + display_list = ['{}{}'.format( klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in plugins.items()] if plugin_type == 'Extractor': @@ -4062,14 +4082,13 @@ class YoutubeDL: # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() - write_debug('Public IP address: %s' % ipaddr) + write_debug(f'Public IP address: {ipaddr}') latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( - 'You are using an outdated version (newest version: %s)! ' - 'See https://yt-dl.org/update if you need help updating.' % - latest_version) + f'You are using an outdated version (newest version: {latest_version})! ' + 'See https://yt-dl.org/update if you need help updating.') @functools.cached_property def proxies(self): @@ -4103,7 +4122,7 @@ class YoutubeDL: return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies) def _get_available_impersonate_targets(self): - # todo(future): make available as public API + # TODO(future): make available as public API return [ (target, rh.RH_NAME) for rh in self._request_director.handlers.values() @@ -4112,7 +4131,7 @@ class YoutubeDL: ] def _impersonate_target_available(self, target): - # todo(future): make available as public API + # TODO(future): make available as public API return any( rh.is_supported_target(target) for rh in self._request_director.handlers.values() @@ -4238,7 +4257,7 @@ class YoutubeDL: return encoding def _write_info_json(self, label, ie_result, infofn, overwrite=None): - ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error ''' + """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """ if overwrite is None: overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): @@ -4261,7 +4280,7 @@ class YoutubeDL: return None def _write_description(self, label, ie_result, descfn): - ''' Write description and returns True = written, False = skip, None = error ''' + """ Write description and returns True = written, False = skip, None = error """ if not self.params.get('writedescription'): return False elif not descfn: @@ -4285,7 +4304,7 @@ class YoutubeDL: return True def _write_subtitles(self, info_dict, filename): - ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' + """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error""" ret = [] subtitles = info_dict.get('requested_subtitles') if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): @@ -4331,7 +4350,7 @@ class YoutubeDL: self.dl(sub_filename, sub_copy, subtitle=True) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) - except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err: msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' if self.params.get('ignoreerrors') is not True: # False or 'only_download' if not self.params.get('ignoreerrors'): @@ -4341,7 +4360,7 @@ class YoutubeDL: return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): - ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error ''' + """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """ write_all = self.params.get('write_all_thumbnails', False) thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): @@ -4368,8 +4387,8 @@ class YoutubeDL: existing_thumb = self.existing_file((thumb_filename_final, thumb_filename)) if existing_thumb: - self.to_screen('[info] %s is already present' % ( - thumb_display_id if multiple else f'{label} thumbnail').capitalize()) + self.to_screen('[info] {} is already present'.format(( + thumb_display_id if multiple else f'{label} thumbnail').capitalize())) t['filepath'] = existing_thumb ret.append((existing_thumb, thumb_filename_final)) else: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 3d606bc..f88f15d 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -14,7 +14,7 @@ import os import re import traceback -from .compat import compat_os_name, compat_shlex_quote +from .compat import compat_os_name from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader.external import get_external_downloader from .extractor import list_extractor_classes @@ -58,11 +58,13 @@ from .utils import ( read_stdin, render_table, setproctitle, + shell_quote, traverse_obj, variadic, write_string, ) from .utils.networking import std_headers +from .utils._utils import _UnsafeExtensionError from .YoutubeDL import YoutubeDL _IN_CLI = False @@ -115,9 +117,9 @@ def print_extractor_information(opts, urls): ie.description(markdown=False, search_examples=_SEARCHES) for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) elif opts.ap_list_mso: - out = 'Supported TV Providers:\n%s\n' % render_table( + out = 'Supported TV Providers:\n{}\n'.format(render_table( ['mso', 'mso name'], - [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]) + [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])) else: return False write_string(out, out=sys.stdout) @@ -129,7 +131,7 @@ def set_compat_opts(opts): if name not in opts.compat_opts: return False opts.compat_opts.discard(name) - opts.compat_opts.update(['*%s' % name]) + opts.compat_opts.update([f'*{name}']) return True def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): @@ -222,7 +224,7 @@ def validate_options(opts): validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') if opts.wait_for_video is not None: - min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None]) validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), 'time range to wait for video', opts.wait_for_video) validate_minmax(min_wait, max_wait, 'time range to wait for video') @@ -264,9 +266,9 @@ def validate_options(opts): # Retry sleep function def parse_sleep_func(expr): NUMBER_RE = r'\d+(?:\.\d+)?' - op, start, limit, step, *_ = tuple(re.fullmatch( + op, start, limit, step, *_ = (*tuple(re.fullmatch( rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', - expr.strip()).groups()) + (None, None) + expr.strip()).groups()), None, None) if op == 'exp': return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) @@ -396,13 +398,13 @@ def validate_options(opts): # MetadataParser def metadataparser_actions(f): if isinstance(f, str): - cmd = '--parse-metadata %s' % compat_shlex_quote(f) + cmd = f'--parse-metadata {shell_quote(f)}' try: actions = [MetadataFromFieldPP.to_action(f)] except Exception as err: raise ValueError(f'{cmd} is invalid; {err}') else: - cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) + cmd = f'--replace-in-metadata {shell_quote(f)}' actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) for action in actions: @@ -413,7 +415,7 @@ def validate_options(opts): yield action if opts.metafromtitle is not None: - opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}') opts.parse_metadata = { k: list(itertools.chain(*map(metadataparser_actions, v))) for k, v in opts.parse_metadata.items() @@ -592,6 +594,13 @@ def validate_options(opts): if opts.ap_username is not None and opts.ap_password is None: opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') + # compat option changes global state destructively; only allow from cli + if 'allow-unsafe-ext' in opts.compat_opts: + warnings.append( + 'Using allow-unsafe-ext opens you up to potential attacks. ' + 'Use with great care!') + _UnsafeExtensionError.sanitize_extension = lambda x: x + return warnings, deprecation_warnings @@ -602,7 +611,7 @@ def get_postprocessors(opts): yield { 'key': 'MetadataParser', 'actions': actions, - 'when': when + 'when': when, } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: @@ -610,19 +619,19 @@ def get_postprocessors(opts): 'key': 'SponsorBlock', 'categories': sponsorblock_query, 'api': opts.sponsorblock_api, - 'when': 'after_filter' + 'when': 'after_filter', } if opts.convertsubtitles: yield { 'key': 'FFmpegSubtitlesConvertor', 'format': opts.convertsubtitles, - 'when': 'before_dl' + 'when': 'before_dl', } if opts.convertthumbnails: yield { 'key': 'FFmpegThumbnailsConvertor', 'format': opts.convertthumbnails, - 'when': 'before_dl' + 'when': 'before_dl', } if opts.extractaudio: yield { @@ -647,7 +656,7 @@ def get_postprocessors(opts): yield { 'key': 'FFmpegEmbedSubtitle', # already_have_subtitle = True prevents the file from being deleted after embedding - 'already_have_subtitle': opts.writesubtitles and keep_subs + 'already_have_subtitle': opts.writesubtitles and keep_subs, } if not opts.writeautomaticsub and keep_subs: opts.writesubtitles = True @@ -660,7 +669,7 @@ def get_postprocessors(opts): 'remove_sponsor_segments': opts.sponsorblock_remove, 'remove_ranges': opts.remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, - 'force_keyframes': opts.force_keyframes_at_cuts + 'force_keyframes': opts.force_keyframes_at_cuts, } # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support @@ -694,7 +703,7 @@ def get_postprocessors(opts): yield { 'key': 'EmbedThumbnail', # already_have_thumbnail = True prevents the file from being deleted after embedding - 'already_have_thumbnail': opts.writethumbnail + 'already_have_thumbnail': opts.writethumbnail, } if not opts.writethumbnail: opts.writethumbnail = True @@ -741,7 +750,7 @@ def parse_options(argv=None): print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', - 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' + 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl', )) if opts.quiet is None: opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) @@ -1002,7 +1011,7 @@ def _real_main(argv=None): def make_row(target, handler): return [ join_nonempty(target.client.title(), target.version, delim='-') or '-', - join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-', + join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-', handler, ] diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index b3a383c..abf54a9 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -68,7 +68,7 @@ def pad_block(block, padding_mode): raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') if padding_mode == 'iso7816' and padding_size: - block = block + [0x80] # NB: += mutates list + block = [*block, 0x80] # NB: += mutates list padding_size -= 1 return block + [PADDING_BYTE[padding_mode]] * padding_size @@ -110,9 +110,7 @@ def aes_ecb_decrypt(data, key, iv=None): for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] encrypted_data += aes_decrypt(block, expanded_key) - encrypted_data = encrypted_data[:len(data)] - - return encrypted_data + return encrypted_data[:len(data)] def aes_ctr_decrypt(data, key, iv): @@ -148,9 +146,7 @@ def aes_ctr_encrypt(data, key, iv): cipher_counter_block = aes_encrypt(counter_block, expanded_key) encrypted_data += xor(block, cipher_counter_block) - encrypted_data = encrypted_data[:len(data)] - - return encrypted_data + return encrypted_data[:len(data)] def aes_cbc_decrypt(data, key, iv): @@ -174,9 +170,7 @@ def aes_cbc_decrypt(data, key, iv): decrypted_block = aes_decrypt(block, expanded_key) decrypted_data += xor(decrypted_block, previous_cipher_block) previous_cipher_block = block - decrypted_data = decrypted_data[:len(data)] - - return decrypted_data + return decrypted_data[:len(data)] def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): @@ -224,7 +218,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) if len(nonce) == 12: - j0 = nonce + [0, 0, 0, 1] + j0 = [*nonce, 0, 0, 0, 1] else: fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) @@ -242,11 +236,11 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): data + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data - + ((len(data) * 8).to_bytes(8, 'big'))) # length of data + + ((len(data) * 8).to_bytes(8, 'big'))), # length of data ) if tag != aes_ctr_encrypt(s_tag, key, j0): - raise ValueError("Mismatching authentication tag") + raise ValueError('Mismatching authentication tag') return decrypted_data @@ -288,9 +282,7 @@ def aes_decrypt(data, expanded_key): data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) data = shift_rows_inv(data) data = sub_bytes_inv(data) - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) - - return data + return xor(data, expanded_key[:BLOCK_SIZE_BYTES]) def aes_decrypt_text(data, password, key_size_bytes): @@ -318,9 +310,7 @@ def aes_decrypt_text(data, password, key_size_bytes): cipher = data[NONCE_LENGTH_BYTES:] decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) - plaintext = intlist_to_bytes(decrypted_data) - - return plaintext + return intlist_to_bytes(decrypted_data) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) @@ -428,9 +418,7 @@ def key_expansion(data): for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): temp = data[-4:] data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - data = data[:expanded_key_size_bytes] - - return data + return data[:expanded_key_size_bytes] def iter_vector(iv): @@ -511,7 +499,7 @@ def block_product(block_x, block_y): # NIST SP 800-38D, Algorithm 1 if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: - raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) + raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes') block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) block_v = block_y[:] @@ -534,7 +522,7 @@ def ghash(subkey, data): # NIST SP 800-38D, Algorithm 2 if len(data) % BLOCK_SIZE_BYTES: - raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) + raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes') last_y = [0] * BLOCK_SIZE_BYTES for i in range(0, len(data), BLOCK_SIZE_BYTES): diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index 9dd4f2f..71dca82 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -81,10 +81,10 @@ class Cache: cachedir = self._get_root_dir() if not any((term in cachedir) for term in ('cache', 'tmp')): - raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) + raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir') self._ydl.to_screen( - 'Removing cache dir %s .' % cachedir, skip_eol=True) + f'Removing cache dir {cachedir} .', skip_eol=True) if os.path.exists(cachedir): self._ydl.to_screen('.', skip_eol=True) shutil.rmtree(cachedir) diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index 7ea5d08..dfc792e 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -35,7 +35,7 @@ from .compat_utils import passthrough_module from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 -from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401 +from ..networking.exceptions import HTTPError as compat_HTTPError passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py index 36c9836..9668957 100644 --- a/yt_dlp/compat/functools.py +++ b/yt_dlp/compat/functools.py @@ -7,6 +7,6 @@ passthrough_module(__name__, 'functools') del passthrough_module try: - cache # >= 3.9 + _ = cache # >= 3.9 except NameError: cache = lru_cache(maxsize=None) diff --git a/yt_dlp/compat/imghdr.py b/yt_dlp/compat/imghdr.py index 5d64ab0..4ae173f 100644 --- a/yt_dlp/compat/imghdr.py +++ b/yt_dlp/compat/imghdr.py @@ -1,16 +1,22 @@ -tests = { - 'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP', - 'png': lambda h: h[:8] == b'\211PNG\r\n\032\n', - 'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'), - 'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'), -} - - def what(file=None, h=None): """Detect format of image (Currently supports jpeg, png, webp, gif only) - Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py + Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py + Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf """ if h is None: with open(file, 'rb') as f: h = f.read(12) - return next((type_ for type_, test in tests.items() if test(h)), None) + + if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8): + return 'webp' + + if h.startswith(b'\x89PNG'): + return 'png' + + if h.startswith(b'\xFF\xD8\xFF'): + return 'jpeg' + + if h.startswith(b'GIF'): + return 'gif' + + return None diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 815897d..070d2fc 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -2,7 +2,9 @@ import base64 import collections import contextlib import datetime as dt +import functools import glob +import hashlib import http.cookiejar import http.cookies import io @@ -17,14 +19,12 @@ import tempfile import time import urllib.request from enum import Enum, auto -from hashlib import pbkdf2_hmac from .aes import ( aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes, unpad_pkcs7, ) -from .compat import functools # isort: split from .compat import compat_os_name from .dependencies import ( _SECRETSTORAGE_UNAVAILABLE_REASON, @@ -146,7 +146,7 @@ def _extract_firefox_cookies(profile, container, logger): identities = json.load(containers).get('identities', []) container_id = next((context.get('userContextId') for context in identities if container in ( context.get('name'), - try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()) + try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()), )), None) if not isinstance(container_id, int): raise ValueError(f'could not find firefox container "{container}" in containers.json') @@ -263,7 +263,7 @@ def _get_chromium_based_browser_settings(browser_name): return { 'browser_dir': browser_dir, 'keyring_name': keyring_name, - 'supports_profiles': browser_name not in browsers_without_profiles + 'supports_profiles': browser_name not in browsers_without_profiles, } @@ -740,40 +740,38 @@ def _get_linux_desktop_environment(env, logger): xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) desktop_session = env.get('DESKTOP_SESSION', None) if xdg_current_desktop is not None: - xdg_current_desktop = xdg_current_desktop.split(':')[0].strip() - - if xdg_current_desktop == 'Unity': - if desktop_session is not None and 'gnome-fallback' in desktop_session: + for part in map(str.strip, xdg_current_desktop.split(':')): + if part == 'Unity': + if desktop_session is not None and 'gnome-fallback' in desktop_session: + return _LinuxDesktopEnvironment.GNOME + else: + return _LinuxDesktopEnvironment.UNITY + elif part == 'Deepin': + return _LinuxDesktopEnvironment.DEEPIN + elif part == 'GNOME': return _LinuxDesktopEnvironment.GNOME - else: - return _LinuxDesktopEnvironment.UNITY - elif xdg_current_desktop == 'Deepin': - return _LinuxDesktopEnvironment.DEEPIN - elif xdg_current_desktop == 'GNOME': - return _LinuxDesktopEnvironment.GNOME - elif xdg_current_desktop == 'X-Cinnamon': - return _LinuxDesktopEnvironment.CINNAMON - elif xdg_current_desktop == 'KDE': - kde_version = env.get('KDE_SESSION_VERSION', None) - if kde_version == '5': - return _LinuxDesktopEnvironment.KDE5 - elif kde_version == '6': - return _LinuxDesktopEnvironment.KDE6 - elif kde_version == '4': - return _LinuxDesktopEnvironment.KDE4 - else: - logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') - return _LinuxDesktopEnvironment.KDE4 - elif xdg_current_desktop == 'Pantheon': - return _LinuxDesktopEnvironment.PANTHEON - elif xdg_current_desktop == 'XFCE': - return _LinuxDesktopEnvironment.XFCE - elif xdg_current_desktop == 'UKUI': - return _LinuxDesktopEnvironment.UKUI - elif xdg_current_desktop == 'LXQt': - return _LinuxDesktopEnvironment.LXQT - else: - logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') + elif part == 'X-Cinnamon': + return _LinuxDesktopEnvironment.CINNAMON + elif part == 'KDE': + kde_version = env.get('KDE_SESSION_VERSION', None) + if kde_version == '5': + return _LinuxDesktopEnvironment.KDE5 + elif kde_version == '6': + return _LinuxDesktopEnvironment.KDE6 + elif kde_version == '4': + return _LinuxDesktopEnvironment.KDE4 + else: + logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') + return _LinuxDesktopEnvironment.KDE4 + elif part == 'Pantheon': + return _LinuxDesktopEnvironment.PANTHEON + elif part == 'XFCE': + return _LinuxDesktopEnvironment.XFCE + elif part == 'UKUI': + return _LinuxDesktopEnvironment.UKUI + elif part == 'LXQt': + return _LinuxDesktopEnvironment.LXQT + logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') elif desktop_session is not None: if desktop_session == 'deepin': @@ -826,7 +824,7 @@ def _choose_linux_keyring(logger): elif desktop_environment == _LinuxDesktopEnvironment.KDE6: linux_keyring = _LinuxKeyring.KWALLET6 elif desktop_environment in ( - _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER + _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER, ): linux_keyring = _LinuxKeyring.BASICTEXT else: @@ -861,7 +859,7 @@ def _get_kwallet_network_wallet(keyring, logger): 'dbus-send', '--session', '--print-reply=literal', f'--dest={service_name}', wallet_path, - 'org.kde.KWallet.networkWallet' + 'org.kde.KWallet.networkWallet', ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: @@ -891,7 +889,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger): 'kwallet-query', '--read-password', f'{browser_keyring_name} Safe Storage', '--folder', f'{browser_keyring_name} Keys', - network_wallet + network_wallet, ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: @@ -931,9 +929,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger): for item in col.get_all_items(): if item.get_label() == f'{browser_keyring_name} Safe Storage': return item.get_secret() - else: - logger.error('failed to read from keyring') - return b'' + logger.error('failed to read from keyring') + return b'' def _get_linux_keyring_password(browser_keyring_name, keyring, logger): @@ -1002,7 +999,7 @@ def _get_windows_v10_key(browser_root, logger): def pbkdf2_sha1(password, salt, iterations, key_length): - return pbkdf2_hmac('sha1', password, salt, iterations, key_length) + return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length) def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): @@ -1053,7 +1050,7 @@ def _decrypt_windows_dpapi(ciphertext, logger): None, # pvReserved: must be NULL None, # pPromptStruct: information about prompts to display 0, # dwFlags - ctypes.byref(blob_out) # pDataOut + ctypes.byref(blob_out), # pDataOut ) if not ret: logger.warning('failed to decrypt with DPAPI', only_once=True) @@ -1129,24 +1126,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') _RESERVED = { - "expires", - "path", - "comment", - "domain", - "max-age", - "secure", - "httponly", - "version", - "samesite", + 'expires', + 'path', + 'comment', + 'domain', + 'max-age', + 'secure', + 'httponly', + 'version', + 'samesite', } - _FLAGS = {"secure", "httponly"} + _FLAGS = {'secure', 'httponly'} # Added 'bad' group to catch the remaining value - _COOKIE_PATTERN = re.compile(r""" + _COOKIE_PATTERN = re.compile(r''' \s* # Optional whitespace at start of cookie (?P # Start of group 'key' - [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter + [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter ) # End of group 'key' ( # Optional group: there may not be a value. \s*=\s* # Equal Sign @@ -1156,7 +1153,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): | # or \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr | # or - [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string + [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string ) # End of group 'val' | # or (?P(?:\\;|[^;])*?) # 'bad' group fallback for invalid values @@ -1164,7 +1161,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): )? # End of optional value group \s* # Any number of spaces. (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII | re.VERBOSE) + ''', re.ASCII | re.VERBOSE) def load(self, data): # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 @@ -1260,14 +1257,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): # with no name, whereas http.cookiejar regards it as a # cookie with no value. name, value = '', name - f.write('%s\n' % '\t'.join(( + f.write('{}\n'.format('\t'.join(( cookie.domain, self._true_or_false(cookie.domain.startswith('.')), cookie.path, self._true_or_false(cookie.secure), str_or_none(cookie.expires, default=''), - name, value - ))) + name, value, + )))) def save(self, filename=None, ignore_discard=True, ignore_expires=True): """ @@ -1306,10 +1303,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): return line cookie_list = line.split('\t') if len(cookie_list) != self._ENTRY_LEN: - raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) + raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) if cookie.expires_at and not cookie.expires_at.isdigit(): - raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line cf = io.StringIO() diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 65a0d6f..2e3ea2f 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -404,7 +404,7 @@ class FileDownloader: def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" - self.to_screen('[download] Resuming download at byte %s' % resume_len) + self.to_screen(f'[download] Resuming download at byte {resume_len}') def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): """Report retry""" diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 8b0b94e..ae23729 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,4 +1,5 @@ import enum +import functools import json import os import re @@ -9,7 +10,6 @@ import time import uuid from .fragment import FragmentFD -from ..compat import functools from ..networking import Request from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( @@ -55,7 +55,7 @@ class ExternalFD(FragmentFD): # correct and expected termination thus all postprocessing # should take place retval = 0 - self.to_screen('[%s] Interrupted by user' % self.get_basename()) + self.to_screen(f'[{self.get_basename()}] Interrupted by user') finally: if self._cookies_tempfile: self.try_remove(self._cookies_tempfile) @@ -108,7 +108,7 @@ class ExternalFD(FragmentFD): return all(( not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, - not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), + not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'), all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), )) @@ -172,7 +172,7 @@ class ExternalFD(FragmentFD): decrypt_fragment = self.decrypter(info_dict) dest, _ = self.sanitize_open(tmpfilename, 'wb') for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + fragment_filename = f'{tmpfilename}-Frag{frag_index}' try: src, _ = self.sanitize_open(fragment_filename, 'rb') except OSError as err: @@ -186,7 +186,7 @@ class ExternalFD(FragmentFD): if not self.params.get('keep_fragments', False): self.try_remove(encodeFilename(fragment_filename)) dest.close() - self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) + self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) return 0 def _call_process(self, cmd, info_dict): @@ -336,11 +336,11 @@ class Aria2cFD(ExternalFD): if 'fragments' in info_dict: cmd += ['--uri-selector=inorder'] - url_list_file = '%s.frag.urls' % tmpfilename + url_list_file = f'{tmpfilename}.frag.urls' url_list = [] for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) - url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename))) + fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}' + url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename))) stream, _ = self.sanitize_open(url_list_file, 'wb') stream.write('\n'.join(url_list).encode()) stream.close() @@ -357,7 +357,7 @@ class Aria2cFD(ExternalFD): 'id': sanitycheck, 'method': method, 'params': [f'token:{rpc_secret}', *params], - }).encode('utf-8') + }).encode() request = Request( f'http://localhost:{rpc_port}/jsonrpc', data=d, headers={ @@ -416,7 +416,7 @@ class Aria2cFD(ExternalFD): 'total_bytes_estimate': total, 'eta': (total - downloaded) / (speed or 1), 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, - 'elapsed': time.time() - started + 'elapsed': time.time() - started, }) self._hook_progress(status, info_dict) @@ -509,12 +509,12 @@ class FFmpegFD(ExternalFD): proxy = self.params.get('proxy') if proxy: if not re.match(r'^[\da-zA-Z]+://', proxy): - proxy = 'http://%s' % proxy + proxy = f'http://{proxy}' if proxy.startswith('socks'): self.report_warning( - '%s does not support SOCKS proxies. Downloading is likely to fail. ' - 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) + f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. ' + 'Consider adding --hls-prefer-native to your command.') # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) @@ -575,7 +575,7 @@ class FFmpegFD(ExternalFD): if end_time: args += ['-t', str(end_time - start_time)] - args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 28cbba0..22d0ebd 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -67,12 +67,12 @@ class FlvReader(io.BytesIO): self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount - for i in range(quality_entry_count): + for _ in range(quality_entry_count): self.read_string() segment_run_count = self.read_unsigned_int() segments = [] - for i in range(segment_run_count): + for _ in range(segment_run_count): first_segment = self.read_unsigned_int() fragments_per_segment = self.read_unsigned_int() segments.append((first_segment, fragments_per_segment)) @@ -91,12 +91,12 @@ class FlvReader(io.BytesIO): quality_entry_count = self.read_unsigned_char() # QualitySegmentUrlModifiers - for i in range(quality_entry_count): + for _ in range(quality_entry_count): self.read_string() fragments_count = self.read_unsigned_int() fragments = [] - for i in range(fragments_count): + for _ in range(fragments_count): first = self.read_unsigned_int() first_ts = self.read_unsigned_long_long() duration = self.read_unsigned_int() @@ -135,11 +135,11 @@ class FlvReader(io.BytesIO): self.read_string() # MovieIdentifier server_count = self.read_unsigned_char() # ServerEntryTable - for i in range(server_count): + for _ in range(server_count): self.read_string() quality_count = self.read_unsigned_char() # QualityEntryTable - for i in range(quality_count): + for _ in range(quality_count): self.read_string() # DrmData self.read_string() @@ -148,14 +148,14 @@ class FlvReader(io.BytesIO): segments_count = self.read_unsigned_char() segments = [] - for i in range(segments_count): + for _ in range(segments_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'asrt' segment = FlvReader(box_data).read_asrt() segments.append(segment) fragments_run_count = self.read_unsigned_char() fragments = [] - for i in range(fragments_run_count): + for _ in range(fragments_run_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'afrt' fragments.append(FlvReader(box_data).read_afrt()) @@ -309,7 +309,7 @@ class F4mFD(FragmentFD): def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') - self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url @@ -326,8 +326,8 @@ class F4mFD(FragmentFD): formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] else: - rate, media = list(filter( - lambda f: int(f[0]) == requested_bitrate, formats))[0] + rate, media = next(filter( + lambda f: int(f[0]) == requested_bitrate, formats)) # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. man_base_url = get_base_url(doc) or man_url diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index b4f003d..0d00196 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -199,7 +199,7 @@ class FragmentFD(FileDownloader): '.ytdl file is corrupt' if is_corrupt else 'Inconsistent state of incomplete fragment download') self.report_warning( - '%s. Restarting from the beginning ...' % message) + f'{message}. Restarting from the beginning ...') ctx['fragment_index'] = resume_len = 0 if 'ytdl_corrupt' in ctx: del ctx['ytdl_corrupt'] @@ -366,10 +366,10 @@ class FragmentFD(FileDownloader): return decrypt_fragment def download_and_append_fragments_multiple(self, *args, **kwargs): - ''' + """ @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list - ''' + """ interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: @@ -424,7 +424,7 @@ class FragmentFD(FileDownloader): finally: tpe.shutdown(wait=True) if not interrupt_trigger[0] and not is_live: - raise KeyboardInterrupt() + raise KeyboardInterrupt # we expect the user wants to stop and DO WANT the preceding postprocessors to run; # so returning a intermediate result here instead of KeyboardInterrupt on live return result diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 4ac5d99..0a00d5d 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -72,7 +72,7 @@ class HlsFD(FragmentFD): def real_download(self, filename, info_dict): man_url = info_dict['url'] - self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url @@ -160,10 +160,12 @@ class HlsFD(FragmentFD): extra_state = ctx.setdefault('extra_state', {}) format_index = info_dict.get('format_index') - extra_query = None - extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') - if extra_param_to_segment_url: - extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) + extra_segment_query = None + if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'): + extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url) + extra_key_query = None + if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'): + extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url) i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} @@ -190,8 +192,8 @@ class HlsFD(FragmentFD): if frag_index <= ctx['fragment_index']: continue frag_url = urljoin(man_url, line) - if extra_query: - frag_url = update_url_query(frag_url, extra_query) + if extra_segment_query: + frag_url = update_url_query(frag_url, extra_segment_query) fragments.append({ 'frag_index': frag_index, @@ -212,8 +214,8 @@ class HlsFD(FragmentFD): frag_index += 1 map_info = parse_m3u8_attributes(line[11:]) frag_url = urljoin(man_url, map_info.get('URI')) - if extra_query: - frag_url = update_url_query(frag_url, extra_query) + if extra_segment_query: + frag_url = update_url_query(frag_url, extra_segment_query) if map_info.get('BYTERANGE'): splitted_byte_range = map_info.get('BYTERANGE').split('@') @@ -228,7 +230,7 @@ class HlsFD(FragmentFD): 'url': frag_url, 'decrypt_info': decrypt_info, 'byte_range': byte_range, - 'media_sequence': media_sequence + 'media_sequence': media_sequence, }) media_sequence += 1 @@ -244,8 +246,10 @@ class HlsFD(FragmentFD): decrypt_info['KEY'] = external_aes_key else: decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) - if extra_query: - decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) + if extra_key_query or extra_segment_query: + # Fall back to extra_segment_query to key for backwards compat + decrypt_info['URI'] = update_url_query( + decrypt_info['URI'], extra_key_query or extra_segment_query) if decrypt_url != decrypt_info['URI']: decrypt_info['KEY'] = None @@ -350,9 +354,8 @@ class HlsFD(FragmentFD): # XXX: this should probably be silent as well # or verify that all segments contain the same data self.report_warning(bug_reports_message( - 'Discarding a %s block found in the middle of the stream; ' - 'if the subtitles display incorrectly,' - % (type(block).__name__))) + f'Discarding a {type(block).__name__} block found in the middle of the stream; ' + 'if the subtitles display incorrectly,')) continue block.write_into(output) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 693828b..c016579 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -176,7 +176,7 @@ class HttpFD(FileDownloader): 'downloaded_bytes': ctx.resume_len, 'total_bytes': ctx.resume_len, }, info_dict) - raise SucceedDownload() + raise SucceedDownload else: # The length does not match, we start the download over self.report_unable_to_resume() @@ -194,7 +194,7 @@ class HttpFD(FileDownloader): def close_stream(): if ctx.stream is not None: - if not ctx.tmpfilename == '-': + if ctx.tmpfilename != '-': ctx.stream.close() ctx.stream = None @@ -268,20 +268,20 @@ class HttpFD(FileDownloader): ctx.filename = self.undo_temp_name(ctx.tmpfilename) self.report_destination(ctx.filename) except OSError as err: - self.report_error('unable to open for writing: %s' % str(err)) + self.report_error(f'unable to open for writing: {err}') return False if self.params.get('xattr_set_filesize', False) and data_len is not None: try: write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: - self.report_error('unable to set filesize xattr: %s' % str(err)) + self.report_error(f'unable to set filesize xattr: {err}') try: ctx.stream.write(data_block) except OSError as err: self.to_stderr('\n') - self.report_error('unable to write data: %s' % str(err)) + self.report_error(f'unable to write data: {err}') return False # Apply rate limit @@ -327,7 +327,7 @@ class HttpFD(FileDownloader): elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() - raise ThrottledDownload() + raise ThrottledDownload elif speed: ctx.throttle_start = None @@ -338,7 +338,7 @@ class HttpFD(FileDownloader): if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter - raise NextFragment() + raise NextFragment if ctx.tmpfilename != '-': ctx.stream.close() diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index dd688f5..62c3a3b 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -251,7 +251,7 @@ class IsmFD(FragmentFD): skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) frag_index = 0 - for i, segment in enumerate(segments): + for segment in segments: frag_index += 1 if frag_index <= ctx['fragment_index']: continue diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index d977dce..3d4f2d7 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION class MhtmlFD(FragmentFD): - _STYLESHEET = """\ + _STYLESHEET = '''\ html, body { margin: 0; padding: 0; @@ -45,7 +45,7 @@ body > figure > img { max-width: 100%; max-height: calc(100vh - 5em); } -""" +''' _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) @@ -57,24 +57,19 @@ body > figure > img { )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): - return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary) + return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid' def _gen_stub(self, *, fragments, frag_boundary, title): output = io.StringIO() - output.write(( + output.write( '' '' '' - '' '' - '' '{title}' - '' '' - '' - ).format( - version=escapeHTML(YT_DLP_VERSION), - styles=self._STYLESHEET, - title=escapeHTML(title) - )) + f'' + f'{escapeHTML(title)}' + f'' + '') t0 = 0 for i, frag in enumerate(fragments): @@ -87,15 +82,12 @@ body > figure > img { num=i + 1, t0=srt_subtitles_timecode(t0), t1=srt_subtitles_timecode(t1), - duration=formatSeconds(frag['duration'], msec=True) + duration=formatSeconds(frag['duration'], msec=True), )) except (KeyError, ValueError, TypeError): t1 = None - output.write(( - '
Slide #{num}
' - ).format(num=i + 1)) - output.write(''.format( - cid=self._gen_cid(i, frag, frag_boundary))) + output.write(f'
Slide #{i + 1}
') + output.write(f'') output.write('') t0 = t1 @@ -126,31 +118,24 @@ body > figure > img { stub = self._gen_stub( fragments=fragments, frag_boundary=frag_boundary, - title=title + title=title, ) ctx['dest_stream'].write(( 'MIME-Version: 1.0\r\n' 'From: \r\n' 'To: \r\n' - 'Subject: {title}\r\n' + f'Subject: {self._escape_mime(title)}\r\n' 'Content-type: multipart/related; ' - '' 'boundary="{boundary}"; ' - '' 'type="text/html"\r\n' - 'X.yt-dlp.Origin: {origin}\r\n' + f'boundary="{frag_boundary}"; ' + 'type="text/html"\r\n' + f'X.yt-dlp.Origin: {origin}\r\n' '\r\n' - '--{boundary}\r\n' + f'--{frag_boundary}\r\n' 'Content-Type: text/html; charset=utf-8\r\n' - 'Content-Length: {length}\r\n' + f'Content-Length: {len(stub)}\r\n' '\r\n' - '{stub}\r\n' - ).format( - origin=origin, - boundary=frag_boundary, - length=len(stub), - title=self._escape_mime(title), - stub=stub - ).encode()) + f'{stub}\r\n').encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index fef8bff..462c6e2 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -15,7 +15,7 @@ class NiconicoDmcFD(FileDownloader): def real_download(self, filename, info_dict): from ..extractor.niconico import NiconicoIE - self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading from DMC') ie = NiconicoIE(self.ydl) info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) @@ -34,7 +34,7 @@ class NiconicoDmcFD(FileDownloader): try: self.ydl.urlopen(request).read() except Exception: - self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Heartbeat failed') with heartbeat_lock: if not download_complete: @@ -85,14 +85,14 @@ class NiconicoLiveFD(FileDownloader): 'quality': live_quality, 'protocol': 'hls+fmp4', 'latency': live_latency, - 'chasePlay': False + 'chasePlay': False, }, 'room': { 'protocol': 'webSocket', - 'commentable': True + 'commentable': True, }, 'reconnect': True, - } + }, })) else: ws = ws_extractor @@ -118,7 +118,7 @@ class NiconicoLiveFD(FileDownloader): elif self.ydl.params.get('verbose', False): if len(recv) > 100: recv = recv[:100] + '...' - self.to_screen('[debug] Server said: %s' % recv) + self.to_screen(f'[debug] Server said: {recv}') def ws_main(): reconnect = False @@ -128,7 +128,7 @@ class NiconicoLiveFD(FileDownloader): if ret is True: return except BaseException as e: - self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) + self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) time.sleep(10) continue finally: diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index 0e09525..d7ffb3b 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -180,9 +180,9 @@ class RtmpFD(FileDownloader): while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: prevsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) + self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') time.sleep(5.0) # This seems to be needed - args = basic_args + ['--resume'] + args = [*basic_args, '--resume'] if retval == RD_FAILED: args += ['--skip', '1'] args = [encodeArgument(a) for a in args] @@ -197,7 +197,7 @@ class RtmpFD(FileDownloader): break if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) + self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index c7a8637..961938d 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD): def real_download(self, filename, info_dict): video_id = info_dict['video_id'] - self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading live chat') if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': self.report_warning('Live chat download runs until the livestream ends. ' 'If you wish to download the video simultaneously, run a separate yt-dlp instance') diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e9cd38a..7f6507d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -76,6 +76,7 @@ from .aenetworks import ( ) from .aeonco import AeonCoIE from .afreecatv import ( + AfreecaTVCatchStoryIE, AfreecaTVIE, AfreecaTVLiveIE, AfreecaTVUserIE, @@ -779,6 +780,7 @@ from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE +from .graspop import GraspopIE from .gronkh import ( GronkhFeedIE, GronkhIE, @@ -969,6 +971,10 @@ from .la7 import ( LA7PodcastEpisodeIE, LA7PodcastIE, ) +from .laracasts import ( + LaracastsIE, + LaracastsPlaylistIE, +) from .lastfm import ( LastFMIE, LastFMPlaylistIE, @@ -1113,12 +1119,15 @@ from .meipai import MeipaiIE from .melonvod import MelonVODIE from .metacritic import MetacriticIE from .mgtv import MGTVIE -from .microsoftembed import MicrosoftEmbedIE -from .microsoftstream import MicrosoftStreamIE -from .microsoftvirtualacademy import ( - MicrosoftVirtualAcademyCourseIE, - MicrosoftVirtualAcademyIE, +from .microsoftembed import ( + MicrosoftBuildIE, + MicrosoftEmbedIE, + MicrosoftLearnEpisodeIE, + MicrosoftLearnPlaylistIE, + MicrosoftLearnSessionIE, + MicrosoftMediusIE, ) +from .microsoftstream import MicrosoftStreamIE from .mildom import ( MildomClipIE, MildomIE, @@ -1603,6 +1612,7 @@ from .qqmusic import ( QQMusicPlaylistIE, QQMusicSingerIE, QQMusicToplistIE, + QQMusicVideoIE, ) from .r7 import ( R7IE, @@ -1755,7 +1765,10 @@ from .rtve import ( RTVETelevisionIE, ) from .rtvs import RTVSIE -from .rtvslo import RTVSLOIE +from .rtvslo import ( + RTVSLOIE, + RTVSLOShowIE, +) from .rudovideo import RudoVideoIE from .rule34video import Rule34VideoIE from .rumble import ( @@ -1925,6 +1938,10 @@ from .spreaker import ( ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE +from .sproutvideo import ( + SproutVideoIE, + VidsIoIE, +) from .srgssr import ( SRGSSRIE, SRGSSRPlayIE, diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 2c0d296..7518ba6 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -4,7 +4,6 @@ import re import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, dict_get, @@ -67,7 +66,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'WWI Centenary', 'description': 'md5:c2379ec0ca84072e86b446e536954546', - } + }, }, { 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', 'info_dict': { @@ -75,7 +74,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', - } + }, }, { 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', 'info_dict': { @@ -86,7 +85,7 @@ class ABCIE(InfoExtractor): 'upload_date': '20200813', 'uploader': 'Behind the News', 'uploader_id': 'behindthenews', - } + }, }, { 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', 'info_dict': { @@ -95,7 +94,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', - } + }, }] def _real_extract(self, url): @@ -126,7 +125,7 @@ class ABCIE(InfoExtractor): if mobj is None: expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?(.+?)', webpage, 'expired', None) if expired: - raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True) raise ExtractorError('Unable to extract video urls') urls_info = self._parse_json( @@ -164,7 +163,7 @@ class ABCIE(InfoExtractor): 'height': height, 'tbr': bitrate, 'filesize': int_or_none(url_info.get('filesize')), - 'format_id': format_id + 'format_id': format_id, }) return { @@ -288,13 +287,12 @@ class ABCIViewIE(InfoExtractor): stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) house_number = video_params.get('episodeHouseNumber') or video_id - path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( - int(time.time()), house_number) + path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet' sig = hmac.new( b'android.content.res.Resources', - path.encode('utf-8'), hashlib.sha256).hexdigest() + path.encode(), hashlib.sha256).hexdigest() token = self._download_webpage( - 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) + f'http://iview.abc.net.au{path}&sig={sig}', video_id) def tokenize_url(url, token): return update_url_query(url, { @@ -303,7 +301,7 @@ class ABCIViewIE(InfoExtractor): for sd in ('1080', '720', 'sd', 'sd-low'): sd_url = try_get( - stream, lambda x: x['streams']['hls'][sd], compat_str) + stream, lambda x: x['streams']['hls'][sd], str) if not sd_url: continue formats = self._extract_m3u8_formats( @@ -358,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'md5:93119346c24a7c322d446d8eece430ff', 'series': 'Upper Middle Bogan', 'season': 'Series 1', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 8, }, { @@ -386,7 +384,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.', 'series': '7.30 Mark Humphries Satire', 'season': 'Episodes', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 15, }] @@ -398,7 +396,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', webpage, 'initial state') video_data = self._parse_json( - unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id) + unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id) video_data = video_data['route']['pageData']['_embedded'] highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) diff --git a/yt_dlp/extractor/abcnews.py b/yt_dlp/extractor/abcnews.py index a57295b..7215500 100644 --- a/yt_dlp/extractor/abcnews.py +++ b/yt_dlp/extractor/abcnews.py @@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE): display_id = mobj.group('display_id') video_id = mobj.group('id') info_dict = self._extract_feed_info( - 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) + f'http://abcnews.go.com/video/itemfeed?id={video_id}') info_dict.update({ 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 6dca19d..ea5882b 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( dict_get, int_or_none, @@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor): data = self._download_json( 'https://api.abcotvs.com/v2/content', display_id, query={ 'id': video_id, - 'key': 'otv.web.%s.story' % station, + 'key': f'otv.web.{station}.story', 'station': station, })['data'] video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data - video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) + video_id = str(dict_get(video, ('id', 'publishedKey'), video_id)) title = video.get('title') or video['linkText'] formats = [] diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index b8c79b9..293a6c4 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -66,8 +66,8 @@ class AbemaLicenseHandler(urllib.request.BaseHandler): query={'t': media_token}, data=json.dumps({ 'kv': 'a', - 'lt': ticket - }).encode('utf-8'), + 'lt': ticket, + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -77,7 +77,7 @@ class AbemaLicenseHandler(urllib.request.BaseHandler): h = hmac.new( binascii.unhexlify(self.HKEY), - (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), + (license_response['cid'] + self.ie._DEVICE_ID).encode(), digestmod=hashlib.sha256) enckey = bytes_to_intlist(h.digest()) @@ -103,11 +103,11 @@ class AbemaTVBaseIE(InfoExtractor): @classmethod def _generate_aks(cls, deviceid): - deviceid = deviceid.encode('utf-8') + deviceid = deviceid.encode() # add 1 hour and then drop minute and secs ts_1hour = int((time_seconds() // 3600 + 1) * 3600) time_struct = time.gmtime(ts_1hour) - ts_1hour_str = str(ts_1hour).encode('utf-8') + ts_1hour_str = str(ts_1hour).encode() tmp = None @@ -119,7 +119,7 @@ class AbemaTVBaseIE(InfoExtractor): def mix_tmp(count): nonlocal tmp - for i in range(count): + for _ in range(count): mix_once(tmp) def mix_twist(nonce): @@ -160,7 +160,7 @@ class AbemaTVBaseIE(InfoExtractor): data=json.dumps({ 'deviceId': self._DEVICE_ID, 'applicationKeySecret': aks, - }).encode('utf-8'), + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -180,7 +180,7 @@ class AbemaTVBaseIE(InfoExtractor): 'osLang': 'ja_JP', 'osTimezone': 'Asia/Tokyo', 'appId': 'tv.abema', - 'appVersion': '3.27.1' + 'appVersion': '3.27.1', }, headers={ 'Authorization': f'bearer {self._get_device_token()}', })['token'] @@ -202,8 +202,8 @@ class AbemaTVBaseIE(InfoExtractor): f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in', data=json.dumps({ method: username, - 'password': password - }).encode('utf-8'), headers={ + 'password': password, + }).encode(), headers={ 'Authorization': f'bearer {self._get_device_token()}', 'Origin': 'https://abema.tv', 'Referer': 'https://abema.tv/', @@ -344,7 +344,7 @@ class AbemaTVIE(AbemaTVBaseIE): description = self._html_search_regex( (r'(.+?)

(.+?)(.+?)(.+?)' % (tag, tag), xml_str, tag) + f'<{tag}>(.+?)', xml_str, tag) def is_expired(token, date_ele): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) @@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.url, post_url) + post_url = urllib.parse.urljoin(urlh.url, post_url) form_data = self._hidden_inputs(form_page) form_data.update(data) return self._download_webpage_handle( @@ -1414,13 +1414,13 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' redirect_url = self._search_regex( r'(?i)]+src="(%s)' % HistoryPlayerIE._VALID_URL, + rf']+src="({HistoryPlayerIE._VALID_URL})', webpage, 'player URL') return self.url_result(player_url, HistoryPlayerIE.ie_key()) diff --git a/yt_dlp/extractor/aeonco.py b/yt_dlp/extractor/aeonco.py index 390eae3..22d0266 100644 --- a/yt_dlp/extractor/aeonco.py +++ b/yt_dlp/extractor/aeonco.py @@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Semiconductor', 'uploader_id': 'semiconductor', 'uploader_url': 'https://vimeo.com/semiconductor', - 'duration': 348 - } + 'duration': 348, + }, }, { 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', 'md5': '03582d795382e49f2fd0b427b55de409', @@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Aeon Video', 'uploader_id': 'aeonvideo', 'uploader_url': 'https://vimeo.com/aeonvideo', - 'duration': 1344 - } + 'duration': 1344, + }, }, { 'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out', 'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b', diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 3e5738f..f51b5a6 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -55,7 +55,7 @@ class AfreecaTVBaseIE(InfoExtractor): if result != 1: error = _ERRORS.get(result, 'You have failed to log in.') raise ExtractorError( - 'Unable to login: %s said: %s' % (self.IE_NAME, error), + f'Unable to login: {self.IE_NAME} said: {error}', expected=True) @@ -72,7 +72,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): )\?.*?\bnTitleNo=| vod\.afreecatv\.com/(PLAYER/STATION|player)/ ) - (?P\d+) + (?P\d+)/?(?:$|[?#&]) ''' _TESTS = [{ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', @@ -189,7 +189,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): headers={'Referer': url}, data=urlencode_postdata({ 'nTitleNo': video_id, 'nApiLevel': 10, - }))['data'] + }), impersonate=True)['data'] error_code = traverse_obj(data, ('code', {int})) if error_code == -6221: @@ -227,7 +227,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): **traverse_obj(file_element, { 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('file_start', {unified_timestamp}), - }) + }), }) if traverse_obj(data, ('adult_status', {str})) == 'notLogin': @@ -253,6 +253,43 @@ class AfreecaTVIE(AfreecaTVBaseIE): return self.playlist_result(entries, video_id, multi_video=True, **common_info) +class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): + IE_NAME = 'afreecatv:catchstory' + IE_DESC = 'afreecatv.com catch story' + _VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P\d+)/catchstory' + _TESTS = [{ + 'url': 'https://vod.afreecatv.com/player/103247/catchstory', + 'info_dict': { + 'id': '103247', + }, + 'playlist_count': 2, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._download_json( + 'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url}, + query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True) + + return self.playlist_result(self._entries(data), video_id) + + @staticmethod + def _entries(data): + # 'files' is always a list with 1 element + yield from traverse_obj(data, ( + 'data', lambda _, v: v['story_type'] == 'catch', + 'catch_list', lambda _, v: v['files'][0]['file'], { + 'id': ('files', 0, 'file_info_key', {str}), + 'url': ('files', 0, 'file', {url_or_none}), + 'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}), + 'title': ('title', {str}), + 'uploader': ('writer_nick', {str}), + 'uploader_id': ('writer_id', {str}), + 'thumbnail': ('thumb', {url_or_none}), + 'timestamp': ('write_timestamp', {int_or_none}), + })) + + class AfreecaTVLiveIE(AfreecaTVBaseIE): IE_NAME = 'afreecatv:live' IE_DESC = 'afreecatv.com livestreams' diff --git a/yt_dlp/extractor/agora.py b/yt_dlp/extractor/agora.py index abb2d3f..9835584 100644 --- a/yt_dlp/extractor/agora.py +++ b/yt_dlp/extractor/agora.py @@ -168,7 +168,7 @@ class TokFMPodcastIE(InfoExtractor): for ext in ('aac', 'mp3'): url_data = self._download_json( f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', - media_id, 'Downloading podcast %s URL' % ext) + media_id, f'Downloading podcast {ext} URL') # prevents inserting the mp3 (default) multiple times if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: formats.append({ @@ -206,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor): } @staticmethod - def _create_url(id): - return f'https://audycje.tokfm.pl/audycja/{id}' + def _create_url(video_id): + return f'https://audycje.tokfm.pl/audycja/{video_id}' def _real_extract(self, url): audition_id = self._match_id(url) diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py index 6cc63cd..cee660d 100644 --- a/yt_dlp/extractor/airtv.py +++ b/yt_dlp/extractor/airtv.py @@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor): 'view_count': int, 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', 'timestamp': 1664792603, - } + }, }, { # with youtube_id 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', @@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor): 'channel': 'Newsflare', 'duration': 37, 'upload_date': '20180511', - } + }, }] def _get_formats_and_subtitle(self, json_data, video_id): diff --git a/yt_dlp/extractor/aitube.py b/yt_dlp/extractor/aitube.py index 89a6450..5179b72 100644 --- a/yt_dlp/extractor/aitube.py +++ b/yt_dlp/extractor/aitube.py @@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor): 'timestamp': 1667370519, 'title': 'Ангел хранитель 1 серия', 'channel_follower_count': int, - } + }, }, { # embed url 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py index 2e83f2e..e8f8618 100644 --- a/yt_dlp/extractor/aliexpress.py +++ b/yt_dlp/extractor/aliexpress.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, try_get, @@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor): 'title': title, 'thumbnail': data.get('coverUrl'), 'uploader': try_get( - data, lambda x: x['followBar']['name'], compat_str), + data, lambda x: x['followBar']['name'], str), 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), 'formats': formats, } diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index 124bab0..9715b49 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor): 'timestamp': 1636219149, 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', 'upload_date': '20211106', - } + }, }, { 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', 'info_dict': { @@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor): BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P\d+)/(?P[a-zA-Z0-9]+)_(?P[^/]+)/index.html\?videoId=(?P\d+)' def _real_extract(self, url): - base, post_type, id = self._match_valid_url(url).groups() + base, post_type, display_id = self._match_valid_url(url).groups() wp = { 'balkans.aljazeera.net': 'ajb', 'chinese.aljazeera.net': 'chinese', @@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor): 'news': 'news', }[post_type.split('/')[0]] video = self._download_json( - f'https://{base}/graphql', id, query={ + f'https://{base}/graphql', display_id, query={ 'wp-site': wp, 'operationName': 'ArchipelagoSingleArticleQuery', 'variables': json.dumps({ - 'name': id, + 'name': display_id, 'postType': post_type, }), }, headers={ @@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor): embed = 'default' if video_id is None: - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, display_id) account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', group=(1, 2, 3, 4), default=(None, None, None, None)) @@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor): return { '_type': 'url_transparent', 'url': url, - 'ie_key': 'Generic' + 'ie_key': 'Generic', } return { '_type': 'url_transparent', 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', - 'ie_key': 'BrightcoveNew' + 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 2d342cf..e0859d4 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, qualities, @@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor): duration = int_or_none(video.get('duration')) view_count = int_or_none(video.get('view_count')) timestamp = unified_timestamp(try_get( - video, lambda x: x['added_at']['date'], compat_str)) + video, lambda x: x['added_at']['date'], str)) else: video_id = display_id media_data = self._download_json( - 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id) title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné')) for key, value in media_data['video'].items(): if not key.endswith('Path'): diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py index 49df4bf..5ea1c30 100644 --- a/yt_dlp/extractor/allstar.py +++ b/yt_dlp/extractor/allstar.py @@ -33,27 +33,27 @@ _QUERIES = { video: getClip(clipIdentifier: $id) { %s %s } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'montage': '''query ($id: String!) { video: getMontage(clipIdentifier: $id) { %s } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Clips': '''query ($page: Int!, $user: String!, $game: Int) { videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'Montages': '''query ($page: Int!, $user: String!) { videos: montages(search: createdDate, page: $page, user: $user) { data { %s } } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Mobile Clips': '''query ($page: Int!, $user: String!) { videos: clips(search: createdDate, page: $page, user: $user, mobile: true) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 } @@ -121,7 +121,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230425', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/clip?clip=8LJLY4JKB', 'info_dict': { @@ -139,7 +139,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230702', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c', 'info_dict': { @@ -155,7 +155,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230418', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=RILJMH6QOS', 'info_dict': { @@ -171,7 +171,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230703', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -191,28 +191,28 @@ class AllstarProfileIE(AllstarBaseIE): 'id': '62b8bdfc9021052f7905882d-clips', 'title': 'cherokee - Clips', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-clips-730', 'title': 'cherokee - Clips - 730', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-montages', 'title': 'cherokee - Montages', }, - 'playlist_mincount': 4 + 'playlist_mincount': 4, }, { 'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-mobile', 'title': 'cherokee - Mobile Clips', }, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }] _PAGE_SIZE = 10 diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index f927965..7b74d55 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor): 'tbr': 1145, 'categories': list, 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index ea3332e..c315e4f 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -12,7 +12,7 @@ from ..utils import ( class Alsace20TVBaseIE(InfoExtractor): def _extract_video(self, video_id, url=None): info = self._download_json( - 'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ), + f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html', video_id) or {} title = info.get('titre') @@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor): else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' - thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) + thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage)) upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) - upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None + upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/altcensored.py b/yt_dlp/extractor/altcensored.py index 6878918..bfbf6b6 100644 --- a/yt_dlp/extractor/altcensored.py +++ b/yt_dlp/extractor/altcensored.py @@ -34,7 +34,7 @@ class AltCensoredIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', 'view_count': int, 'categories': ['News & Politics'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index cb2b989..ce03a42 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, clean_html, @@ -21,7 +21,7 @@ class AluraIE(InfoExtractor): 'info_dict': { 'id': '60095', 'ext': 'mp4', - 'title': 'Referências, ref-set e alter' + 'title': 'Referências, ref-set e alter', }, 'skip': 'Requires alura account credentials'}, { @@ -30,7 +30,7 @@ class AluraIE(InfoExtractor): 'only_matching': True}, { 'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219', - 'only_matching': True} + 'only_matching': True}, ] def _real_extract(self, url): @@ -62,7 +62,7 @@ class AluraIE(InfoExtractor): return { 'id': video_id, 'title': video_title, - "formats": formats + 'formats': formats, } def _perform_login(self, username, password): @@ -91,7 +91,7 @@ class AluraIE(InfoExtractor): 'post url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( post_url, None, 'Logging in', @@ -103,7 +103,7 @@ class AluraIE(InfoExtractor): r'(?s)]+class="alert-message[^"]*">(.+?)

', response, 'error message', default=None) if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') @@ -119,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE @classmethod def suitable(cls, url): - return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url) + return False if AluraIE.suitable(url) else super().suitable(url) def _real_extract(self, url): @@ -157,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE 'url': video_url, 'id_key': self.ie_key(), 'chapter': chapter, - 'chapter_number': chapter_number + 'chapter_number': chapter_number, } entries.append(entry) return self.playlist_result(entries, course_path, course_title) diff --git a/yt_dlp/extractor/amadeustv.py b/yt_dlp/extractor/amadeustv.py index 2f5ca91..f4ea04e 100644 --- a/yt_dlp/extractor/amadeustv.py +++ b/yt_dlp/extractor/amadeustv.py @@ -24,7 +24,7 @@ class AmadeusTVIE(InfoExtractor): 'display_id': '65091a87ff85af59d9fc54c3', 'view_count': int, 'description': 'md5:a0357b9c215489e2067cbae0b777bb95', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 509b21a..ed0f0cd 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor): 'uploader': 'PBS NewsHour', 'uploader_id': 'PBSNewsHour', 'timestamp': 1549639570, - } + }, }, { # Vimeo 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', @@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor): 'timestamp': 1294763658, 'upload_date': '20110111', 'uploader': 'Sam Morrill', - 'uploader_id': 'sammorrill' - } + 'uploader_id': 'sammorrill', + }, }, { # Direct Link 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', @@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor): 'subtitles': dict, 'upload_date': '20091007', 'timestamp': 1254942511, - } + }, }] def _real_extract(self, url): video_id = self._match_id(url) meta = self._download_json( - 'https://amara.org/api/videos/%s/' % video_id, + f'https://amara.org/api/videos/{video_id}/', video_id, query={'format': 'json'}) title = meta['title'] video_url = meta['all_urls'][0] diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index a03f983..d1b9166 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) + playlist_id = self._match_id(url) for retry in self.RetryManager(): - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, playlist_id) try: data_json = self._search_json( - r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, + r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id, transform_source=js_to_json) except ExtractorError as e: retry.error = e @@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor): 'height': int_or_none(video.get('videoHeight')), 'width': int_or_none(video.get('videoWidth')), } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] - return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) + return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title')) class AmazonReviewsIE(InfoExtractor): diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 2c71c5e..0590a34 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -25,7 +25,7 @@ class AmazonMiniTVBaseIE(InfoExtractor): asin, note=note, headers={ 'Content-Type': 'application/json', 'currentpageurl': '/', - 'currentplatform': 'dWeb' + 'currentplatform': 'dWeb', }, data=json.dumps(data).encode() if data else None, query=None if data else { 'deviceType': 'A1WMMUXPCUJL4N', diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index 10bd021..15a86e2 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -64,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE site, display_id = self._match_valid_url(url).groups() requestor_id = self._REQUESTOR_ID_MAP[site] page_data = self._download_json( - 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' - % (requestor_id.lower(), display_id), display_id)['data'] + f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}', + display_id)['data'] properties = page_data.get('properties') or {} query = { 'mbr': 'true', @@ -76,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE try: for v in page_data['children']: if v.get('type') == 'video-player': - releasePid = v['properties']['currentVideo']['meta']['releasePid'] - tp_path = 'M_UwQC/' + releasePid + release_pid = v['properties']['currentVideo']['meta']['releasePid'] + tp_path = 'M_UwQC/' + release_pid media_url = 'https://link.theplatform.com/s/' + tp_path video_player_count += 1 except KeyError: pass if video_player_count > 1: self.report_warning( - 'The JSON data has %d video players. Only one will be extracted' % video_player_count) + f'The JSON data has {video_player_count} video players. Only one will be extracted') # Fall back to videoPid if releasePid not found. # TODO: Fall back to videoPid if releasePid manifest uses DRM. @@ -131,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE }) ns_keys = theplatform_metadata.get('$xmlns', {}).keys() if ns_keys: - ns = list(ns_keys)[0] + ns = next(iter(ns_keys)) episode = theplatform_metadata.get(ns + '$episodeTitle') or None episode_number = int_or_none( theplatform_metadata.get(ns + '$episode')) diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py index e889458..a6337e4 100644 --- a/yt_dlp/extractor/americastestkitchen.py +++ b/yt_dlp/extractor/americastestkitchen.py @@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor): resource_type = 'episodes' resource = self._download_json( - 'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id) + f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id) video = resource['video'] if is_episode else resource episode = resource if is_episode else resource.get('episode') or {} return { '_type': 'url_transparent', - 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'], + 'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']), 'ie_key': 'Zype', 'description': clean_html(video.get('description')), 'timestamp': unified_timestamp(video.get('publishDate')), @@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): ] if season_number: - playlist_id = 'season_%d' % season_number - playlist_title = 'Season %d' % season_number + playlist_id = f'season_{season_number}' + playlist_title = f'Season {season_number}' facet_filters.append('search_season_list:' + playlist_title) else: playlist_id = show playlist_title = title season_search = self._download_json( - 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, + f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production', playlist_id, headers={ 'Origin': 'https://www.americastestkitchen.com', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-Application-Id': 'Y1FNZXUI30', }, query={ 'facetFilters': json.dumps(facet_filters), - 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug, + 'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season', 'attributesToHighlight': '', 'hitsPerPage': 1000, }) @@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): 'description': episode.get('description'), 'timestamp': unified_timestamp(episode.get('search_document_date')), 'season_number': season_number, - 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)), + 'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')), 'ie_key': AmericasTestKitchenIE.ie_key(), } diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 6b2bf2d..adf4733 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with 'Unable to download Akamai AMP feed', transform_source=strip_jsonp) item = feed.get('channel', {}).get('item') if not item: - raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error'])) video_id = item['guid'] def get_media_node(name, default=None): - media_name = 'media-%s' % name + media_name = f'media-{name}' media_group = item.get('media-group') or item return media_group.get(media_name) or item.get(media_name) or item.get(name, default) diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py index 5e78f37..652154a 100644 --- a/yt_dlp/extractor/anchorfm.py +++ b/yt_dlp/extractor/anchorfm.py @@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'release_date': '20230121', 'release_timestamp': 1674285179, 'episode_id': 'e1tpt3d', - } + }, }, { # embed url 'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', @@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'season': 'Season 2', 'season_number': 2, 'episode_id': 'e1shjqd', - } + }, }] _WEBPAGE_TESTS = [{ @@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', 'uploader': 'Podcast Tempo', 'channel': 'apakatatempo', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py index 9f5b9b5..6800fe3 100644 --- a/yt_dlp/extractor/angel.py +++ b/yt_dlp/extractor/angel.py @@ -15,8 +15,8 @@ class AngelIE(InfoExtractor): 'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons', 'description': 'md5:73b704897c20ab59c433a9c0a8202d5e', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 1359.0 - } + 'duration': 1359.0, + }, }, { 'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name', 'md5': 'e4774bad0a5f0ad2e90d175cafdb797d', @@ -26,8 +26,8 @@ class AngelIE(InfoExtractor): 'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name', 'description': 'md5:aadfb4827a94415de5ff6426e6dee3be', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 3276.0 - } + 'duration': 3276.0, + }, }] def _real_extract(self, url): @@ -44,7 +44,7 @@ class AngelIE(InfoExtractor): 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } # Angel uses cloudinary in the background and supports image transformations. diff --git a/yt_dlp/extractor/antenna.py b/yt_dlp/extractor/antenna.py index 2929d65..b1a0179 100644 --- a/yt_dlp/extractor/antenna.py +++ b/yt_dlp/extractor/antenna.py @@ -105,7 +105,7 @@ class Ant1NewsGrArticleIE(AntennaBaseIE): info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) if not embed_urls: - raise ExtractorError('no videos found for %s' % video_id, expected=True) + raise ExtractorError(f'no videos found for {video_id}', expected=True) return self.playlist_from_matches( embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 0df5033..bf3d60b 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -238,7 +238,7 @@ class AnvatoIE(InfoExtractor): 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', - 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' + 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', } def _generate_nfl_token(self, anvack, mcp_id): @@ -255,7 +255,7 @@ class AnvatoIE(InfoExtractor): token } } -}''' % (anvack, mcp_id), +}''' % (anvack, mcp_id), # noqa: UP031 }).encode(), headers={ 'Authorization': auth_token, 'Content-Type': 'application/json', @@ -299,7 +299,7 @@ class AnvatoIE(InfoExtractor): return self._download_json( video_data_url, video_id, transform_source=strip_jsonp, query=query, - data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8')) + data=json.dumps({'api': api}, separators=(',', ':')).encode()) def _get_anvato_videos(self, access_key, video_id, token): video_data = self._get_video_json(access_key, video_id, token) @@ -358,7 +358,7 @@ class AnvatoIE(InfoExtractor): for caption in video_data.get('captions', []): a_caption = { 'url': caption['url'], - 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None + 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None, } subtitles.setdefault(caption['language'], []).append(a_caption) subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs) diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 455f667..893dce7 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -30,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # video with vidible ID 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', @@ -46,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'only_matching': True, @@ -83,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE return self._extract_yahoo_video(video_id, 'us') response = self._download_json( - 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, + f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details', video_id)['response'] if response['statusText'] != 'Ok': - raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True) video_data = response['data'] formats = [] diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index 1ea0b1d..fed5970 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -34,7 +34,7 @@ class APAIE(InfoExtractor): video_id, base_url = mobj.group('id', 'base_url') webpage = self._download_webpage( - '%s/player/%s' % (base_url, video_id), video_id) + f'{base_url}/player/{video_id}', video_id) jwplatform_id = self._search_regex( r'media[iI]d\s*:\s*["\'](?P[a-zA-Z0-9]{8})', webpage, @@ -47,7 +47,7 @@ class APAIE(InfoExtractor): def extract(field, name=None): return self._search_regex( - r'\b%s["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' % field, + rf'\b{field}["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, name or field, default=None, group='value') title = extract('title') or video_id diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index 49bbeab..bd301e9 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -24,7 +24,7 @@ class ApplePodcastsIE(InfoExtractor): 'duration': 6454, 'series': 'The Tim Dillon Show', 'thumbnail': 're:.+[.](png|jpe?g|webp)', - } + }, }, { 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', 'only_matching': True, diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 21103ae..0a600f6 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,8 +1,8 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( int_or_none, parse_duration, @@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor): 'uploader_id': 'wb', }, }, - ] + ], }, { 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 'info_dict': { @@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor): webpage = self._download_webpage(url, movie) film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') film_data = self._download_json( - 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, + f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json', film_id, fatal=False) if film_data: @@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor): if not src: continue formats.append({ - 'format_id': '%s-%s' % (version, size), + 'format_id': f'{version}-{size}', 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'width': int_or_none(size_data.get('width')), 'height': int_or_none(size_data.get('height')), @@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor): page_data = film_data.get('page', {}) return self.playlist_result(entries, film_id, page_data.get('movie_title')) - playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') + playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc') def fix_html(s): s = re.sub(r'(?s).*?', '', s) @@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor): # like: http://trailers.apple.com/trailers/wb/gravity/ def _clean_json(m): - return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + return 'iTunes.playURL({});'.format(m.group(1).replace('\'', ''')) s = re.sub(self._JSON_RE, _clean_json, s) - s = '%s' % s - return s + return f'{s}' doc = self._download_xml(playlist_url, movie, transform_source=fix_html) playlist = [] @@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor): duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() - settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) + settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json') settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') formats = [] - for format in settings['metadata']['sizes']: + for fmt in settings['metadata']['sizes']: # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src']) formats.append({ 'url': format_url, - 'format': format['type'], - 'width': int_or_none(format['width']), - 'height': int_or_none(format['height']), + 'format': fmt['type'], + 'width': int_or_none(fmt['width']), + 'height': int_or_none(fmt['height']), }) playlist.append({ @@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor): 'title': 'Movie Studios', }, } - _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P%s)' % '|'.join(_SECTIONS) + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P{})'.format('|'.join(_SECTIONS)) _TESTS = [{ 'url': 'http://trailers.apple.com/#section=justadded', 'info_dict': { @@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor): def _real_extract(self, url): section = self._match_id(url) section_data = self._download_json( - 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], + 'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']), section) entries = [ self.url_result('http://trailers.apple.com' + e['location']) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 41f3a4f..f5a55ef 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,10 +1,11 @@ +from __future__ import annotations + import json import re import urllib.parse from .common import InfoExtractor from .youtube import YoutubeBaseInfoExtractor, YoutubeIE -from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..networking.exceptions import HTTPError from ..utils import ( @@ -145,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'Bells Of Rostov', 'ext': 'mp3', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3', 'md5': '1d0aabe03edca83ca58d9ed3b493a3c3', @@ -158,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor): 'description': 'md5:012b2d668ae753be36896f343d12a236', 'upload_date': '20190928', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { # Original formats are private 'url': 'https://archive.org/details/irelandthemakingofarepublic', @@ -202,8 +203,8 @@ class ArchiveOrgIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'display_id': 'irelandthemakingofarepublicreel2.mov', }, - } - ] + }, + ], }] @staticmethod @@ -220,7 +221,7 @@ class ArchiveOrgIE(InfoExtractor): def _real_extract(self, url): video_id = urllib.parse.unquote_plus(self._match_id(url)) - identifier, entry_id = (video_id.split('/', 1) + [None])[:2] + identifier, _, entry_id = video_id.partition('/') # Archive.org metadata API doesn't clearly demarcate playlist entries # or subtitle tracks, so we get them from the embeddable player. @@ -246,7 +247,7 @@ class ArchiveOrgIE(InfoExtractor): if track['kind'] != 'subtitles': continue entries[p['orig']][track['label']] = { - 'url': 'https://archive.org/' + track['file'].lstrip('/') + 'url': 'https://archive.org/' + track['file'].lstrip('/'), } metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) @@ -293,7 +294,9 @@ class ArchiveOrgIE(InfoExtractor): 'height': int_or_none(f.get('width')), 'filesize': int_or_none(f.get('size'))}) - extension = (f['name'].rsplit('.', 1) + [None])[1] + _, has_ext, extension = f['name'].rpartition('.') + if not has_ext: + extension = None # We don't want to skip private formats if the user has access to them, # however without access to an account with such privileges we can't implement/test this. @@ -308,7 +311,7 @@ class ArchiveOrgIE(InfoExtractor): 'filesize': int_or_none(f.get('size')), 'protocol': 'https', 'source_preference': 0 if f.get('source') == 'original' else -1, - 'format_note': f.get('source') + 'format_note': f.get('source'), }) for entry in entries.values(): @@ -371,7 +374,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/Zeurel', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg', - } + }, }, { # Internal link 'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0', @@ -388,7 +391,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/1veritasium', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA', - } + }, }, { # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description. # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description @@ -403,8 +406,8 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'machinima', 'uploader_url': 'https://www.youtube.com/user/machinima', 'thumbnail': r're:https?://.*\.(jpg|webp)', - 'uploader': 'machinima' - } + 'uploader': 'machinima', + }, }, { # FLV video. Video file URL does not provide itag information 'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw', @@ -421,7 +424,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'jawed', - } + }, }, { 'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA', 'info_dict': { @@ -437,7 +440,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/itsmadeon', 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # First capture is of dead video, second is the oldest from CDX response. 'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E', @@ -454,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'ETC News', - } + }, }, { # First capture of dead video, capture date in link links to dead capture. 'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E', @@ -473,15 +476,15 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'ETC News', }, 'expected_warnings': [ - r'unable to download capture webpage \(it may not be archived\)' - ] + r'unable to download capture webpage \(it may not be archived\)', + ], }, { # Very old YouTube page, has - YouTube in title. 'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg', 'info_dict': { 'id': '-06-KB9XTzg', 'ext': 'flv', - 'title': 'New Coin Hack!! 100% Safe!!' - } + 'title': 'New Coin Hack!! 100% Safe!!', + }, }, { 'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8', 'info_dict': { @@ -495,7 +498,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'DankPods', - } + }, }, { # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093 'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4', @@ -512,7 +515,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'PewDiePie', 'uploader_url': 'https://www.youtube.com/user/PewDiePie', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~June 2010 Capture. swfconfig 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y', @@ -527,7 +530,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', 'upload_date': '20090520', - } + }, }, { # Jan 2011: watch-video-date/eow-date surrounded by whitespace 'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc', @@ -542,7 +545,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 132, 'uploader_url': 'https://www.youtube.com/user/claybutlermusic', - } + }, }, { # ~May 2009 swfArgs. ytcfg is spread out over various vars 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY', @@ -557,7 +560,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 754, - } + }, }, { # ~June 2012. Upload date is in another lang so cannot extract. 'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA', @@ -571,7 +574,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'BlackNerdComedy', 'duration': 182, 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~July 2013 'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM', @@ -587,7 +590,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ', 'upload_date': '20060428', 'uploader': 'punkybird', - } + }, }, { # April 2020: Player response in player config 'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en', @@ -604,7 +607,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341', 'uploader_url': 'https://www.youtube.com/user/GameGrumps', - } + }, }, { # watch7-user-header with yt-user-info 'url': 'ytarchive:kbh4T_b4Ixw:20160307085057', @@ -619,7 +622,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'upload_date': '20150503', 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA', - } + }, }, { # April 2012 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU', @@ -634,35 +637,35 @@ class YoutubeWebArchiveIE(InfoExtractor): 'duration': 200, 'upload_date': '20120407', 'uploader_id': 'thecomputernerd01', - } + }, }, { 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M', - 'only_matching': True + 'only_matching': True, }, { # Video not archived, only capture is unavailable video page 'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10', - 'only_matching': True + 'only_matching': True, }, { # Encoded url 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc:20050214000000', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc', - 'only_matching': True + 'only_matching': True, }, ] _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE @@ -673,13 +676,13 @@ class YoutubeWebArchiveIE(InfoExtractor): _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers _YT_ALL_THUMB_SERVERS = orderedSet( - _YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]]) + [*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]]) _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/' _OLDEST_CAPTURE_DATE = 20050214000000 _NEWEST_CAPTURE_DATE = 20500101000000 - def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False): + def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False): # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md query = { 'url': url, @@ -688,14 +691,14 @@ class YoutubeWebArchiveIE(InfoExtractor): 'limit': 500, 'filter': ['statuscode:200'] + (filters or []), 'collapse': collapse or [], - **(query or {}) + **(query or {}), } res = self._download_json( 'https://web.archive.org/cdx/search/cdx', item_id, note or 'Downloading CDX API JSON', query=query, fatal=fatal) if isinstance(res, list) and len(res) >= 2: # format response to make it easier to use - return list(dict(zip(res[0], v)) for v in res[1:]) + return [dict(zip(res[0], v)) for v in res[1:]] elif not isinstance(res, list) or len(res) != 0: self.report_warning('Error while parsing CDX API response' + bug_reports_message()) @@ -852,7 +855,7 @@ class YoutubeWebArchiveIE(InfoExtractor): { 'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'), 'filesize': int_or_none(thumbnail_dict.get('length')), - 'preference': int_or_none(thumbnail_dict.get('length')) + 'preference': int_or_none(thumbnail_dict.get('length')), } for thumbnail_dict in response) if not try_all: break @@ -893,7 +896,7 @@ class YoutubeWebArchiveIE(InfoExtractor): for retry in retry_manager: try: urlh = self._request_webpage( - HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id), + HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'), video_id, note='Fetching archived video file url', expected_status=True) except ExtractorError as e: # HTTP Error 404 is expected if the video is not saved. @@ -924,21 +927,21 @@ class YoutubeWebArchiveIE(InfoExtractor): info['thumbnails'] = self._extract_thumbnails(video_id) if urlh: - url = compat_urllib_parse_unquote(urlh.url) + url = urllib.parse.unquote(urlh.url) video_file_url_qs = parse_qs(url) # Attempt to recover any ext & format info from playback url & response headers - format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} + fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} itag = try_get(video_file_url_qs, lambda x: x['itag'][0]) if itag and itag in YoutubeIE._formats: - format.update(YoutubeIE._formats[itag]) - format.update({'format_id': itag}) + fmt.update(YoutubeIE._formats[itag]) + fmt.update({'format_id': itag}) else: mime = try_get(video_file_url_qs, lambda x: x['mime'][0]) ext = (mimetype2ext(mime) or urlhandle_detect_ext(urlh) or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type'))) - format.update({'ext': ext}) - info['formats'] = [format] + fmt.update({'ext': ext}) + info['formats'] = [fmt] if not info.get('duration'): info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0])) diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index febd3d2..8da9bc4 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( extract_attributes, int_or_none, + join_nonempty, parse_iso8601, try_get, ) @@ -11,7 +12,7 @@ from ..utils import ( class ArcPublishingIE(InfoExtractor): _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - _VALID_URL = r'arcpublishing:(?P[a-z]+):(?P%s)' % _UUID_REGEX + _VALID_URL = rf'arcpublishing:(?P[a-z]+):(?P{_UUID_REGEX})' _TESTS = [{ # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', @@ -74,12 +75,12 @@ class ArcPublishingIE(InfoExtractor): def _extract_embed_urls(cls, url, webpage): entries = [] # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview - for powa_el in re.findall(r'(]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage): + for powa_el in re.findall(rf'(]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage): powa = extract_attributes(powa_el) or {} org = powa.get('data-org') uuid = powa.get('data-uuid') if org and uuid: - entries.append('arcpublishing:%s:%s' % (org, uuid)) + entries.append(f'arcpublishing:{org}:{uuid}') return entries def _real_extract(self, url): @@ -122,7 +123,7 @@ class ArcPublishingIE(InfoExtractor): elif stream_type in ('ts', 'hls'): m3u8_formats = self._extract_m3u8_formats( s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) - if all([f.get('acodec') == 'none' for f in m3u8_formats]): + if all(f.get('acodec') == 'none' for f in m3u8_formats): continue for f in m3u8_formats: height = f.get('height') @@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor): else: vbr = int_or_none(s.get('bitrate')) formats.append({ - 'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type, + 'format_id': join_nonempty(stream_type, vbr), 'vbr': vbr, 'width': int_or_none(s.get('width')), 'height': int_or_none(s.get('height')), diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 3db59c5..6fd6413 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -85,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor): formats.extend(self._extract_f4m_formats( update_url_query(stream_url, { 'hdcore': '3.1.1', - 'plugin': 'aasp-3.1.1.69.124' + 'plugin': 'aasp-3.1.1.69.124', }), video_id, f4m_id='hds', fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -96,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor): f = { 'url': server, 'play_path': stream_url, - 'format_id': 'a%s-rtmp-%s' % (num, quality), + 'format_id': f'a{num}-rtmp-{quality}', } else: f = { 'url': stream_url, - 'format_id': 'a%s-%s-%s' % (num, ext, quality) + 'format_id': f'a{num}-{ext}-{quality}', } m = re.search( r'_(?P\d+)x(?P\d+)\.mp4$', diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index de36ec8..aa6c5ca 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor): raise ExtractorError('Invalid URL', expected=True) media = self._download_json( - 'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id), + f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}', video_id, query={ # https://video.qbrick.com/docs/api/examples/library-api.html 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags', @@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor): formats.extend(self._extract_f4m_formats( href, video_id, f4m_id='hds', fatal=False)) elif mime_type == 'application/dash+xml': - formats.extend(self._extract_f4m_formats( - href, video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_mpd_formats( + href, video_id, mpd_id='dash', fatal=False)) elif mime_type == 'application/vnd.ms-sstr+xml': formats.extend(self._extract_ism_formats( href, video_id, ism_id='mss', fatal=False)) diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 9a5524a..f196f61 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( float_or_none, format_field, @@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor): 'view_count': int, 'tags': ['linearna_algebra'], 'start_time': 10, - } + }, }, { 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4', 'only_matching': True, @@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor): 'duration': float_or_none(video.get('duration'), 1000), 'view_count': int_or_none(video.get('views')), 'tags': video.get('hashtags'), - 'start_time': int_or_none(compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get('t', [None])[0]), + 'start_time': int_or_none(urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get('t', [None])[0]), } diff --git a/yt_dlp/extractor/art19.py b/yt_dlp/extractor/art19.py index 271c505..deec7ad 100644 --- a/yt_dlp/extractor/art19.py +++ b/yt_dlp/extractor/art19.py @@ -153,7 +153,7 @@ class Art19IE(InfoExtractor): 'series_id': ('series_id', {str}), 'timestamp': ('created_at', {parse_iso8601}), 'release_timestamp': ('released_at', {parse_iso8601}), - 'modified_timestamp': ('updated_at', {parse_iso8601}) + 'modified_timestamp': ('updated_at', {parse_iso8601}), })), **traverse_obj(rss_metadata, ('content', { 'title': ('episode_title', {str}), diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 46fe006..142d4b0 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -20,15 +20,15 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVIE(ArteTVBaseIE): - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) (?:https?:// (?: - (?:www\.)?arte\.tv/(?P%(langs)s)/videos| - api\.arte\.tv/api/player/v\d+/config/(?P%(langs)s) + (?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos| + api\.arte\.tv/api/player/v\d+/config/(?P{ArteTVBaseIE._ARTE_LANGUAGES}) ) |arte://program) - /(?P\d{6}-\d{3}-[AF]|LIVE) - ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES} + /(?P\d{{6}}-\d{{3}}-[AF]|LIVE) + ''' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'only_matching': True, @@ -145,7 +145,7 @@ class ArteTVIE(ArteTVBaseIE): language_code = self._LANG_MAP.get(lang) config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={ - 'x-validated-age': '18' + 'x-validated-age': '18', }) geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {} @@ -247,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor): 'description': 'md5:be40b667f45189632b78c1425c7c2ce1', 'upload_date': '20201116', }, - 'skip': 'No video available' + 'skip': 'No video available', }, { 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'only_matching': True, @@ -262,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor): class ArteTVPlaylistIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P%s)/videos/(?PRC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?PRC-\d{{6}})' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'only_matching': True, @@ -298,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): class ArteTVCategoryIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P%s)/videos/(?P[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P[\w-]+(?:/[\w-]+)*)/?\s*$' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/politics-and-society/', 'info_dict': { @@ -312,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE): @classmethod def suitable(cls, url): return ( - not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, )) + not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE)) and super().suitable(url)) def _real_extract(self, url): @@ -321,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE): items = [] for video in re.finditer( - r']*?href\s*=\s*(?P"|\'|\b)(?Phttps?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang, + rf']*?href\s*=\s*(?P"|\'|\b)(?Phttps?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)', webpage): video = video.group('url') if video == url: continue - if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): + if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)): items.append(video) title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 3a44e52..0fe95be 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -20,7 +20,7 @@ class AtresPlayerIE(InfoExtractor): 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', 'duration': 3413, }, - 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', @@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor): ] _API_BASE = 'https://api.atresplayer.com/' - def _handle_error(self, e, code): - if isinstance(e.cause, HTTPError) and e.cause.status == code: - error = self._parse_json(e.cause.response.read(), None) - if error.get('error') == 'required_registered': - self.raise_login_required() - raise ExtractorError(error['error_description'], expected=True) - raise - def _perform_login(self, username, password): self._request_webpage( self._API_BASE + 'login', None, 'Downloading login page') @@ -49,13 +41,15 @@ class AtresPlayerIE(InfoExtractor): target_url = self._download_json( 'https://account.atresmedia.com/api/login', None, 'Logging in', headers={ - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }, data=urlencode_postdata({ 'username': username, 'password': password, }))['targetUrl'] except ExtractorError as e: - self._handle_error(e, 400) + if isinstance(e.cause, HTTPError) and e.cause.status == 400: + raise ExtractorError('Invalid username and/or password', expected=True) + raise self._request_webpage(target_url, None, 'Following Target URL') @@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor): episode = self._download_json( self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) except ExtractorError as e: - self._handle_error(e, 403) + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + error = self._parse_json(e.cause.response.read(), None) + if error.get('error') == 'required_registered': + self.raise_login_required() + raise ExtractorError(error['error_description'], expected=True) + raise title = episode['titulo'] diff --git a/yt_dlp/extractor/atscaleconf.py b/yt_dlp/extractor/atscaleconf.py index 3f7b1e9..b219eee 100644 --- a/yt_dlp/extractor/atscaleconf.py +++ b/yt_dlp/extractor/atscaleconf.py @@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'data-scale-spring-2022', 'title': 'Data @Scale Spring 2022', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }, { 'url': 'https://atscaleconference.com/events/video-scale-2021/', @@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'video-scale-2021', 'title': 'Video @Scale 2021', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) return self.playlist_from_matches( re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), - ie='Generic', playlist_id=id, + ie='Generic', playlist_id=playlist_id, title=self._og_search_title(webpage), description=self._og_search_description(webpage)) diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 20ee34c..37bb616 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor): 'id': 'v-ce9cgn1e70n5-1', 'ext': 'mp4', 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', - } + }, }, { 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', 'only_matching': True, @@ -66,10 +66,10 @@ class ATVAtIE(InfoExtractor): video_id=video_id) video_title = json_data['views']['default']['page']['title'] - contentResource = json_data['views']['default']['page']['contentResource'] - content_id = contentResource[0]['id'] - content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']} - for id, content in enumerate(contentResource)] + content_resource = json_data['views']['default']['page']['contentResource'] + content_id = content_resource[0]['id'] + content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']} + for id_, content in enumerate(content_resource)] time_of_request = dt.datetime.now() not_before = time_of_request - dt.timedelta(minutes=5) @@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor): videos = self._download_json( 'https://vas-v4.p7s1video.net/4.0/getsources', content_id, 'Downloading videos JSON', query={ - 'token': jwt_token.decode('utf-8') + 'token': jwt_token.decode('utf-8'), }) - video_id, videos_data = list(videos['data'].items())[0] + video_id, videos_data = next(iter(videos['data'].items())) error_msg = try_get(videos_data, lambda x: x['error']['title']) if error_msg == 'Geo check failed': self.raise_geo_restricted(error_msg) elif error_msg: raise ExtractorError(error_msg) entries = [ - self._extract_video_info(url, contentResource[video['id']], video) + self._extract_video_info(url, content_resource[video['id']], video) for video in videos_data] return { diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index 35114e5..c5a9c7e 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor): 'timestamp': 1448354940, 'duration': 74022, 'view_count': int, - } + }, }, { 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', 'only_matching': True, @@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor): bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) if bitrate: f.update({ - 'format_id': 'http-%s' % bitrate, + 'format_id': f'http-{bitrate}', }) formats.append(f) diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py index a23fcd2..751b74a 100644 --- a/yt_dlp/extractor/audioboom.py +++ b/yt_dlp/extractor/audioboom.py @@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 4000.99, 'uploader': 'Sue Perkins: An hour or so with...', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', - } + }, }, { # Direct mp3-file link 'url': 'https://audioboom.com/posts/8128496.mp3', 'md5': 'e329edf304d450def95c7f86a9165ee1', @@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 1689.7, 'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904', - } + }, }, { 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', 'only_matching': True, diff --git a/yt_dlp/extractor/audiodraft.py b/yt_dlp/extractor/audiodraft.py index 71e5afd..484ad4e 100644 --- a/yt_dlp/extractor/audiodraft.py +++ b/yt_dlp/extractor/audiodraft.py @@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor): headers={ 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest', - }, data=f'id={player_entry_id}'.encode('utf-8')) + }, data=f'id={player_entry_id}'.encode()) return { 'id': str(data_json['entry_id']), @@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) - player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_entry_id = self._search_regex( + r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id') return self._audiodraft_extract_from_id(player_entry_id) @@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - return self._audiodraft_extract_from_id(f'player_entry_{id}') + video_id = self._match_id(url) + return self._audiodraft_extract_from_id(f'player_entry_{video_id}') diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index 5c4160f..1d4460c 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -3,7 +3,6 @@ import time from .common import InfoExtractor from .soundcloud import SoundcloudIE -from ..compat import compat_str from ..utils import ( ExtractorError, url_basename, @@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor): 'id': '310086', 'ext': 'mp3', 'uploader': 'Roosh Williams', - 'title': 'Extraordinary' - } + 'title': 'Extraordinary', + }, }, # audiomack wrapper around soundcloud song # Needs new test URL. @@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor): # API is inconsistent with errors if 'url' not in api_response or not api_response['url'] or 'error' in api_response: - raise ExtractorError('Invalid url %s' % url) + raise ExtractorError(f'Invalid url {url}') # Audiomack wraps a lot of soundcloud tracks in their branded wrapper # if so, pass the work off to the soundcloud extractor @@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor): return self.url_result(api_response['url'], SoundcloudIE.ie_key()) return { - 'id': compat_str(api_response.get('id', album_url_tag)), + 'id': str(api_response.get('id', album_url_tag)), 'uploader': api_response.get('artist'), 'title': api_response.get('title'), 'url': api_response['url'], @@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor): 'info_dict': { 'id': '812251', - 'title': 'Tha Tour: Part 2 (Official Mixtape)' - } + 'title': 'Tha Tour: Part 2 (Official Mixtape)', + }, }, # Album playlist ripped from fakeshoredrive with no metadata { @@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor): 'id': '837576', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }, { 'info_dict': { 'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)', 'id': '837580', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }], - } + }, ] def _real_extract(self, url): @@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor): api_response = self._download_json( 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' % (album_url_tag, track_no, time.time()), album_url_tag, - note='Querying song information (%d)' % (track_no + 1)) + note=f'Querying song information ({track_no + 1})') # Total failure, only occurs when url is totally wrong # Won't happen in middle of valid playlist (next case) if 'url' not in api_response or 'error' in api_response: - raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) + raise ExtractorError(f'Invalid url for track {track_no} of album url {url}') # URL is good but song id doesn't exist - usually means end of playlist elif not api_response['url']: break @@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor): # Pull out the album metadata and add to result (if it exists) for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: if apikey in api_response and resultkey not in result: - result[resultkey] = compat_str(api_response[apikey]) + result[resultkey] = str(api_response[apikey]) song_id = url_basename(api_response['url']).rpartition('.')[0] result['entries'].append({ - 'id': compat_str(api_response.get('id', song_id)), + 'id': str(api_response.get('id', song_id)), 'uploader': api_response.get('artist'), 'title': api_response.get('title', song_id), 'url': api_response['url'], diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index 6448b44..c611c6e 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -1,7 +1,7 @@ import random +import urllib.parse from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote from ..utils import ExtractorError, str_or_none, try_get @@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor): if response_data is not None: return response_data if len(response) == 1 and 'message' in response: - raise ExtractorError('API error: %s' % response['message'], + raise ExtractorError('API error: {}'.format(response['message']), expected=True) raise ExtractorError('Unexpected API response') def _select_api_base(self): """Selecting one of the currently available API hosts""" - response = super(AudiusBaseIE, self)._download_json( + response = super()._download_json( 'https://api.audius.co/', None, note='Requesting available API hosts', errnote='Unable to request available API hosts') @@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor): anything from this link, since the Audius API won't be able to resolve this url """ - url = compat_urllib_parse_unquote(url) - title = compat_urllib_parse_unquote(title) + url = urllib.parse.unquote(url) + title = urllib.parse.unquote(title) if '/' in title or '%2F' in title: fixed_title = title.replace('/', '%5C').replace('%2F', '%5C') return url.replace(title, fixed_title) @@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor): if self._API_BASE is None: self._select_api_base() try: - response = super(AudiusBaseIE, self)._download_json( - '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note, + response = super()._download_json( + f'{self._API_BASE}{self._API_V}{path}', item_id, note=note, errnote=errnote, expected_status=expected_status) except ExtractorError as exc: # some of Audius API hosts may not work as expected and return HTML - if 'Failed to parse JSON' in compat_str(exc): + if 'Failed to parse JSON' in str(exc): raise ExtractorError('An error occurred while receiving data. Try again', expected=True) raise exc return self._get_response_data(response) def _resolve_url(self, url, item_id): - return self._api_request('/resolve?url=%s' % url, item_id, + return self._api_request(f'/resolve?url={url}', item_id, expected_status=404) @@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, { # Regular track @@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, ] _ARTWORK_MAP = { - "150x150": 150, - "480x480": 480, - "1000x1000": 1000 + '150x150': 150, + '480x480': 480, + '1000x1000': 1000, } def _real_extract(self, url): @@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE): else: # API link title = None # uploader = None - track_data = self._api_request('/tracks/%s' % track_id, track_id) + track_data = self._api_request(f'/tracks/{track_id}', track_id) if not isinstance(track_data, dict): raise ExtractorError('Unexpected API response') @@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE): if isinstance(artworks_data, dict): for quality_key, thumbnail_url in artworks_data.items(): thumbnail = { - "url": thumbnail_url + 'url': thumbnail_url, } quality_code = self._ARTWORK_MAP.get(quality_key) if quality_code is not None: @@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE): return { 'id': track_id, 'title': track_data.get('title', title), - 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id), + 'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream', 'ext': 'mp3', 'description': track_data.get('description'), 'duration': track_data.get('duration'), 'track': track_data.get('title'), - 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str), + 'artist': try_get(track_data, lambda x: x['user']['name'], str), 'genre': track_data.get('genre'), 'thumbnails': thumbnails, 'view_count': track_data.get('play_count'), @@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE _TESTS = [ { 'url': 'audius:9RWlo', - 'only_matching': True + 'only_matching': True, }, { 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo', - 'only_matching': True + 'only_matching': True, }, ] @@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE): if not track_id: raise ExtractorError('Unable to get track ID from playlist') entries.append(self.url_result( - 'audius:%s' % track_id, + f'audius:{track_id}', ie=AudiusTrackIE.ie_key(), video_id=track_id)) return entries @@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE): raise ExtractorError('Unable to get playlist ID') playlist_tracks = self._api_request( - '/playlists/%s/tracks' % playlist_id, + f'/playlists/{playlist_id}/tracks', title, note='Downloading playlist tracks metadata', errnote='Unable to download playlist tracks metadata') if not isinstance(playlist_tracks, list): @@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I profile_audius_id = _profile_data[0]['id'] profile_bio = _profile_data[0].get('bio') - api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id) + api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id) return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index a8dfb3e..4066a5a 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -1,10 +1,7 @@ import base64 +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( format_field, int_or_none, @@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor): show_id, video_id, season_id = self._match_valid_url(url).groups() if video_id and int(video_id) > 0: return self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') + f'http://awaan.ae/media/{video_id}', 'AWAANVideo') elif season_id and int(season_id) > 0: return self.url_result(smuggle_url( - 'http://awaan.ae/program/season/%s' % season_id, + f'http://awaan.ae/program/season/{season_id}', {'show_id': show_id}), 'AWAANSeason') else: return self.url_result( - 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') + f'http://awaan.ae/program/{show_id}', 'AWAANSeason') class AWAANBaseIE(InfoExtractor): @@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE): video_id = self._match_id(url) video_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}', video_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(video_data, video_id, False) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({ 'id': video_data['id'], 'user_id': video_data['user_id'], 'signature': video_data['signature'], @@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE): channel_id = self._match_id(url) channel_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}', channel_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(channel_data, channel_id, True) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), 'signature': channel_data['signature'], @@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor): show_id = smuggled_data.get('show_id') if show_id is None: season = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}', season_id, headers={'Origin': 'http://awaan.ae'}) show_id = season['id'] data['show_id'] = show_id @@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor): 'http://admin.mangomolo.com/analytics/index.php/plus/show', show_id, data=urlencode_postdata(data), headers={ 'Origin': 'http://awaan.ae', - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }) if not season_id: season_id = show['default_season'] @@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor): entries = [] for video in show['videos']: - video_id = compat_str(video['id']) + video_id = str(video['id']) entries.append(self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) + f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id)) return self.playlist_result(entries, season_id, title) diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index 4ebef92..177c410 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -1,9 +1,9 @@ import datetime as dt import hashlib import hmac +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor @@ -18,20 +18,20 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with 'Accept': 'application/json', 'Host': self._AWS_PROXY_HOST, 'X-Amz-Date': amz_date, - 'X-Api-Key': self._AWS_API_KEY + 'X-Api-Key': self._AWS_API_KEY, } session_token = aws_dict.get('session_token') if session_token: headers['X-Amz-Security-Token'] = session_token def aws_hash(s): - return hashlib.sha256(s.encode('utf-8')).hexdigest() + return hashlib.sha256(s.encode()).hexdigest() # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - canonical_querystring = compat_urllib_parse_urlencode(query) + canonical_querystring = urllib.parse.urlencode(query) canonical_headers = '' for header_name, header_value in sorted(headers.items()): - canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) + canonical_headers += f'{header_name.lower()}:{header_value}\n' signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) canonical_request = '\n'.join([ 'GET', @@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with canonical_querystring, canonical_headers, signed_headers, - aws_hash('') + aws_hash(''), ]) # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html @@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html def aws_hmac(key, msg): - return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) + return hmac.new(key, msg.encode(), hashlib.sha256) def aws_hmac_digest(key, msg): return aws_hmac(key, msg).digest() @@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with def aws_hmac_hexdigest(key, msg): return aws_hmac(key, msg).hexdigest() - k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') + k_signing = ('AWS4' + aws_dict['secret_key']).encode() for value in credential_scope_list: k_signing = aws_hmac_digest(k_signing, value) @@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html headers['Authorization'] = ', '.join([ - '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), - 'SignedHeaders=%s' % signed_headers, - 'Signature=%s' % signature, + '{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), + f'SignedHeaders={signed_headers}', + f'Signature={signature}', ]) return self._download_json( - 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), + 'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), video_id, headers=headers) diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index d1686ee..0e3a03f 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -38,14 +38,14 @@ class AZMedienIE(InfoExtractor): 'timestamp': 1538328802, 'view_count': int, 'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031', - 'duration': 1930 + 'duration': 1930, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', - 'only_matching': True + 'only_matching': True, }] _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be' _PARTNER_ID = '1719221' @@ -62,5 +62,5 @@ class AZMedienIE(InfoExtractor): })['data']['context']['mainAsset']['video']['kaltura']['kalturaId'] return self.url_result( - 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), + f'kaltura:{self._PARTNER_ID}:{entry_id}', ie=KalturaIE.ie_key(), video_id=entry_id) diff --git a/yt_dlp/extractor/baidu.py b/yt_dlp/extractor/baidu.py index 8786d67..a1ad424 100644 --- a/yt_dlp/extractor/baidu.py +++ b/yt_dlp/extractor/baidu.py @@ -24,8 +24,9 @@ class BaiduVideoIE(InfoExtractor): }] def _call_api(self, path, category, playlist_id, note): - return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( - path, category, playlist_id), playlist_id, note) + return self._download_json( + f'http://app.video.baidu.com/{path}/?worktype=adnative{category}&id={playlist_id}', + playlist_id, note) def _real_extract(self, url): category, playlist_id = self._match_valid_url(url).groups() @@ -44,7 +45,7 @@ class BaiduVideoIE(InfoExtractor): 'xqsingle', category, playlist_id, 'Download episodes JSON metadata') entries = [self.url_result( - episode['url'], video_title=episode['title'] + episode['url'], video_title=episode['title'], ) for episode in episodes_detail['videos']] return self.playlist_result( diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index c4e07a7..d10bdf8 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -1,10 +1,7 @@ import math +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( InAdvancePagedList, format_field, @@ -20,8 +17,8 @@ class BanByeBaseIE(InfoExtractor): @staticmethod def _extract_playlist_id(url, param='playlist'): - return compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get(param, [None])[0] + return urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get(param, [None])[0] def _extract_playlist(self, playlist_id): data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index e89b3a6..61cbab5 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -3,7 +3,6 @@ import re import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, @@ -42,7 +41,7 @@ class BandcampIE(InfoExtractor): 'uploader_id': 'youtube-dl', 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', }, - '_skip': 'There is a limit of 200 free downloads / month for the test song' + 'skip': 'There is a limit of 200 free downloads / month for the test song', }, { # free download 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', @@ -119,7 +118,7 @@ class BandcampIE(InfoExtractor): def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True): return self._parse_json(self._html_search_regex( - r'data-%s=(["\'])({.+?})\1' % attr, webpage, + rf'data-{attr}=(["\'])({{.+?}})\1', webpage, attr + ' data', group=2), video_id, fatal=fatal) def _real_extract(self, url): @@ -167,7 +166,7 @@ class BandcampIE(InfoExtractor): download_link = tralbum.get('freeDownloadPage') if download_link: - track_id = compat_str(tralbum['id']) + track_id = str(tralbum['id']) download_webpage = self._download_webpage( download_link, track_id, 'Downloading free downloads page') @@ -192,7 +191,7 @@ class BandcampIE(InfoExtractor): if isinstance(download_formats_list, list): for f in blob['download_formats']: name, ext = f.get('name'), f.get('file_extension') - if all(isinstance(x, compat_str) for x in (name, ext)): + if all(isinstance(x, str) for x in (name, ext)): download_formats[name] = ext.strip('.') for format_id, f in downloads.items(): @@ -207,7 +206,7 @@ class BandcampIE(InfoExtractor): }) format_id = f.get('encoding_name') or format_id stat = self._download_json( - stat_url, track_id, 'Downloading %s JSON' % format_id, + stat_url, track_id, f'Downloading {format_id} JSON', transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], fatal=False) if not stat: @@ -225,7 +224,7 @@ class BandcampIE(InfoExtractor): 'acodec': format_id.split('-')[0], }) - title = '%s - %s' % (artist, track) if artist else track + title = f'{artist} - {track}' if artist else track if not duration: duration = float_or_none(self._html_search_meta( @@ -267,7 +266,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311756226, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, { 'md5': '1a2c32e2691474643e912cc6cd4bffaa', @@ -278,7 +277,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311757238, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, ], 'info_dict': { @@ -287,9 +286,9 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'blazo', }, 'params': { - 'playlistend': 2 + 'playlistend': 2, }, - 'skip': 'Bandcamp imposes download limits.' + 'skip': 'Bandcamp imposes download limits.', }, { 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', 'info_dict': { @@ -324,7 +323,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): return (False if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) - else super(BandcampAlbumIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): uploader_id, album_id = self._match_valid_url(url).groups() @@ -376,7 +375,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE }, }, { 'url': 'https://bandcamp.com/?blah/blah@&show=228', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -407,7 +406,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE title = show.get('audio_title') or 'Bandcamp Weekly' subtitle = show.get('subtitle') if subtitle: - title += ' - %s' % subtitle + title += f' - {subtitle}' return { 'id': show_id, @@ -419,7 +418,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), 'episode_id': show_id, - 'formats': formats + 'formats': formats, } @@ -440,7 +439,7 @@ class BandcampUserIE(InfoExtractor): 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'id': 'dotscale', - 'title': 'Discography of dotscale' + 'title': 'Discography of dotscale', }, 'playlist_count': 1, }, { diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 82dc9ab..46f2978 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -23,7 +23,7 @@ class BannedVideoIE(InfoExtractor): 'description': 'md5:560d96f02abbebe6c6b78b47465f6b28', 'upload_date': '20200324', 'timestamp': 1585087895, - } + }, }] _GRAPHQL_GETMETADATA_QUERY = ''' @@ -84,15 +84,15 @@ query GetCommentReplies($id: String!) { 'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY, } - def _call_api(self, video_id, id, operation, note): + def _call_api(self, video_id, id_var, operation, note): return self._download_json( 'https://api.infowarsmedia.com/graphql', video_id, note=note, headers={ - 'Content-Type': 'application/json; charset=utf-8' + 'Content-Type': 'application/json; charset=utf-8', }, data=json.dumps({ - 'variables': {'id': id}, + 'variables': {'id': id_var}, 'operationName': operation, - 'query': self._GRAPHQL_QUERIES[operation] + 'query': self._GRAPHQL_QUERIES[operation], }).encode('utf8')).get('data') def _get_comments(self, video_id, comments, comment_data): @@ -151,5 +151,5 @@ query GetCommentReplies($id: String!) { 'tags': [tag.get('name') for tag in video_info.get('tags')], 'availability': self._availability(is_unlisted=video_info.get('unlisted')), 'comments': comments, - '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')) + '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')), } diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index f6b58b3..3af923f 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -2,10 +2,10 @@ import functools import itertools import json import re +import urllib.parse import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import compat_str, compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -35,7 +35,7 @@ class BBCCoUkIE(InfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})' - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) https?:// (?:www\.)?bbc\.co\.uk/ (?: @@ -45,8 +45,8 @@ class BBCCoUkIE(InfoExtractor): radio/player/| events/[^/]+/play/[^/]+/ ) - (?P%s)(?!/(?:episodes|broadcasts|clips)) - ''' % _ID_REGEX + (?P{_ID_REGEX})(?!/(?:episodes|broadcasts|clips)) + ''' _EMBED_REGEX = [r'setPlaylist\("(?Phttps?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)'] _LOGIN_URL = 'https://account.bbc.com/signin' @@ -75,7 +75,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/', @@ -148,7 +148,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz', 'note': 'Video', @@ -162,7 +162,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls', 'info_dict': { @@ -268,19 +268,19 @@ class BBCCoUkIE(InfoExtractor): error = clean_html(get_element_by_class('form-message', response)) if error: raise ExtractorError( - 'Unable to login: %s' % error, expected=True) + f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') class MediaSelectionError(Exception): - def __init__(self, id): - self.id = id + def __init__(self, error_id): + self.id = error_id def _extract_asx_playlist(self, connection, programme_id): asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') return [ref.get('href') for ref in asx.findall('./Entry/ref')] def _extract_items(self, playlist): - return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) + return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item') def _extract_medias(self, media_selection): error = media_selection.get('result') @@ -312,7 +312,7 @@ class BBCCoUkIE(InfoExtractor): def _raise_extractor_error(self, media_selection_error): raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, media_selection_error.id), + f'{self.IE_NAME} returned error: {media_selection_error.id}', expected=True) def _download_media_selector(self, programme_id): @@ -372,7 +372,7 @@ class BBCCoUkIE(InfoExtractor): for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): formats.append({ 'url': ref, - 'format_id': 'ref%s_%s' % (i, format_id), + 'format_id': f'ref{i}_{format_id}', }) elif transfer_format == 'dash': formats.extend(self._extract_mpd_formats( @@ -394,7 +394,7 @@ class BBCCoUkIE(InfoExtractor): href, programme_id, f4m_id=format_id, fatal=False)) else: if not supplier and bitrate: - format_id += '-%d' % bitrate + format_id += f'-{bitrate}' fmt = { 'format_id': format_id, 'filesize': file_size, @@ -423,9 +423,9 @@ class BBCCoUkIE(InfoExtractor): identifier = connection.get('identifier') server = connection.get('server') fmt.update({ - 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), + 'url': f'{protocol}://{server}/{application}?{auth_string}', 'play_path': identifier, - 'app': '%s?%s' % (application, auth_string), + 'app': f'{application}?{auth_string}', 'page_url': 'http://www.bbc.co.uk', 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', 'rtmp_live': False, @@ -441,7 +441,7 @@ class BBCCoUkIE(InfoExtractor): def _download_playlist(self, playlist_id): try: playlist = self._download_json( - 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id, + f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json', playlist_id, 'Downloading playlist JSON') formats = [] subtitles = {} @@ -480,32 +480,32 @@ class BBCCoUkIE(InfoExtractor): def _process_legacy_playlist(self, playlist_id): return self._process_legacy_playlist_url( - 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id) + f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id) def _download_legacy_playlist_url(self, url, playlist_id=None): return self._download_xml( url, playlist_id, 'Downloading legacy playlist XML') def _extract_from_legacy_playlist(self, playlist, playlist_id): - no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS) + no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems') if no_items is not None: reason = no_items.get('reason') if reason == 'preAvailability': - msg = 'Episode %s is not yet available' % playlist_id + msg = f'Episode {playlist_id} is not yet available' elif reason == 'postAvailability': - msg = 'Episode %s is no longer available' % playlist_id + msg = f'Episode {playlist_id} is no longer available' elif reason == 'noMedia': - msg = 'Episode %s is not currently available' % playlist_id + msg = f'Episode {playlist_id} is not currently available' else: - msg = 'Episode %s is not available: %s' % (playlist_id, reason) + msg = f'Episode {playlist_id} is not available: {reason}' raise ExtractorError(msg, expected=True) for item in self._extract_items(playlist): kind = item.get('kind') if kind not in ('programme', 'radioProgramme'): continue - title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text - description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS) + title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text + description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary') description = description_el.text if description_el is not None else None def get_programme_id(item): @@ -515,7 +515,7 @@ class BBCCoUkIE(InfoExtractor): if value and re.match(r'^[pb][\da-z]{7}$', value): return value get_from_attributes(item) - mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS) + mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator') if mediator is not None: return get_from_attributes(mediator) @@ -555,7 +555,7 @@ class BBCCoUkIE(InfoExtractor): if not programme_id: programme_id = self._search_regex( - r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None) + rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None) if programme_id: formats, subtitles = self._download_media_selector(programme_id) @@ -641,7 +641,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE }, 'params': { 'skip_download': True, - } + }, }, { # article with single video embedded with data-playable containing XML playlist # with direct video links as progressiveDownloadUrl (for now these are extracted) @@ -884,7 +884,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'bbc_world_service', 'series': 'CrowdScience', 'chapters': [], - } + }, }, { # onion routes 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576', 'only_matching': True, @@ -897,7 +897,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE) return (False if any(ie.suitable(url) for ie in EXCLUDE_IE) - else super(BBCIE, cls).suitable(url)) + else super().suitable(url)) def _extract_from_media_meta(self, media_meta, video_id): # Direct links to media in media metadata (e.g. @@ -1009,7 +1009,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if playlist: entry = None for key in ('streaming', 'progressiveDownload'): - playlist_url = playlist.get('%sUrl' % key) + playlist_url = playlist.get(f'{key}Url') if not playlist_url: continue try: @@ -1035,7 +1035,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227 group_id = self._search_regex( - r']+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, + rf']+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})', webpage, 'group id', default=None) if group_id: return self.url_result( @@ -1043,9 +1043,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( - [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, - r']+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX, - r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], + [rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"', + rf']+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"', + rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'], webpage, 'vpid', default=None) if programme_id: @@ -1142,7 +1142,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE video_id, url_transparent=True) entry.update({ 'timestamp': traverse_obj(morph_payload, ( - 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}) + 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}), ), **traverse_obj(video_data, { 'thumbnail': (('iChefImage', 'image'), {url_or_none}, any), @@ -1189,7 +1189,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}), 'start_time': ('offset', 'start', {float_or_none}), 'end_time': ('offset', 'end', {float_or_none}), - }) + }), ), } @@ -1287,7 +1287,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any), 'duration': ('versions', 0, 'duration', {int}), 'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}), - }) + }), } def is_type(*types): @@ -1331,7 +1331,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if blocks: summary = [] for block in blocks: - text = try_get(block, lambda x: x['model']['text'], compat_str) + text = try_get(block, lambda x: x['model']['text'], str) if text: summary.append(text) if summary: @@ -1411,9 +1411,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE entries, playlist_id, playlist_title, playlist_description) def extract_all(pattern): - return list(filter(None, map( - lambda s: self._parse_json(s, playlist_id, fatal=False), - re.findall(pattern, webpage)))) + return list(filter(None, ( + self._parse_json(s, playlist_id, fatal=False) + for s in re.findall(pattern, webpage)))) # US accessed article with single embedded video (e.g. # https://www.bbc.com/news/uk-68546268) @@ -1435,14 +1435,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # Multiple video article (e.g. # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460) - EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX + EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?' entries = [] for match in extract_all(r'new\s+SMP\(({.+?})\)'): embed_url = match.get('playerSettings', {}).get('externalEmbedUrl') if embed_url and re.match(EMBED_URL, embed_url): entries.append(embed_url) entries.extend(re.findall( - r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage)) + rf'setPlaylist\("({EMBED_URL})"\)', webpage)) if entries: return self.playlist_result( [self.url_result(entry_, 'BBCCoUk') for entry_ in entries], @@ -1492,11 +1492,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE video_id = media_meta.get('externalId') if not video_id: - video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num) + video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}' title = media_meta.get('caption') if not title: - title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num) + title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}' duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration')) @@ -1557,8 +1557,8 @@ class BBCCoUkArticleIE(InfoExtractor): class BBCCoUkPlaylistBaseIE(InfoExtractor): def _entries(self, webpage, url, playlist_id): - single_page = 'page' in compat_urlparse.parse_qs( - compat_urlparse.urlparse(url).query) + single_page = 'page' in urllib.parse.parse_qs( + urllib.parse.urlparse(url).query) for page_num in itertools.count(2): for video_id in re.findall( self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage): @@ -1572,8 +1572,8 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): if not next_page: break webpage = self._download_webpage( - compat_urlparse.urljoin(url, next_page), playlist_id, - 'Downloading page %d' % page_num, page_num) + urllib.parse.urljoin(url, next_page), playlist_id, + f'Downloading page {page_num}', page_num) def _real_extract(self, url): playlist_id = self._match_id(url) @@ -1588,7 +1588,7 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor): - _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P%s)' % BBCCoUkIE._ID_REGEX + _VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P{BBCCoUkIE._ID_REGEX})' @staticmethod def _get_default(episode, key, default_key='default'): @@ -1712,11 +1712,11 @@ class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE): variables['sliceId'] = series_id return self._download_json( 'https://graph.ibl.api.bbc.co.uk/', pid, headers={ - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', }, data=json.dumps({ 'id': '5692d93d5aac8d796a0305e895e61551', 'variables': variables, - }).encode('utf-8'))['data']['programme'] + }).encode())['data']['programme'] @staticmethod def _get_playlist_data(data): @@ -1776,7 +1776,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE): def _call_api(self, pid, per_page, page=1, series_id=None): return self._download_json( - 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid, + f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes', pid, query={ 'page': page, 'per_page': per_page, @@ -1792,7 +1792,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE): class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): IE_NAME = 'bbc.co.uk:playlist' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX + _VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)' _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s' _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)' _TESTS = [{ diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index 0aecbd0..acc8d12 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import int_or_none @@ -33,7 +32,7 @@ class BeatportIE(InfoExtractor): 'display_id': 'birds-original-mix', 'ext': 'mp4', 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", - } + }, }] def _real_extract(self, url): @@ -51,7 +50,7 @@ class BeatportIE(InfoExtractor): track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) - title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] + title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name'] if track['mix']: title += ' (' + track['mix'] + ')' @@ -89,7 +88,7 @@ class BeatportIE(InfoExtractor): images.append(image) return { - 'id': compat_str(track.get('id')) or track_id, + 'id': str(track.get('id')) or track_id, 'display_id': track.get('slug') or display_id, 'title': title, 'formats': formats, diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index da98ac3..960cdfa 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -23,7 +23,7 @@ class BeegIE(InfoExtractor): 'upload_date': '20220131', 'timestamp': 1643656455, 'display_id': '2540839', - } + }, }, { 'url': 'https://beeg.com/-0599050563103750?t=4-861', 'md5': 'bd8b5ea75134f7f07fad63008db2060e', @@ -38,7 +38,7 @@ class BeegIE(InfoExtractor): 'timestamp': 1643623200, 'display_id': '2569965', 'upload_date': '20220131', - } + }, }, { # api/v6 v2 'url': 'https://beeg.com/1941093077?t=911-1391', @@ -55,8 +55,8 @@ class BeegIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video = self._download_json( - 'https://store.externulls.com/facts/file/%s' % video_id, - video_id, 'Downloading JSON for %s' % video_id) + f'https://store.externulls.com/facts/file/{video_id}', + video_id, f'Downloading JSON for {video_id}') fc_facts = video.get('fc_facts') first_fact = {} diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py index 9d2324f..45f45d0 100644 --- a/yt_dlp/extractor/behindkink.py +++ b/yt_dlp/extractor/behindkink.py @@ -16,7 +16,7 @@ class BehindKinkIE(InfoExtractor): 'upload_date': '20141205', 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py index 677680b..ac45dd4 100644 --- a/yt_dlp/extractor/bellmedia.py +++ b/yt_dlp/extractor/bellmedia.py @@ -86,6 +86,6 @@ class BellMediaIE(InfoExtractor): return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), + 'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}', 'ie_key': 'NineCNineMedia', } diff --git a/yt_dlp/extractor/berufetv.py b/yt_dlp/extractor/berufetv.py index 8160cbd..5bba33a 100644 --- a/yt_dlp/extractor/berufetv.py +++ b/yt_dlp/extractor/berufetv.py @@ -16,7 +16,7 @@ class BerufeTVIE(InfoExtractor): 'tags': ['Studienfilm'], 'duration': 602.440, 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$', - } + }, }] def _real_extract(self, url): @@ -54,7 +54,7 @@ class BerufeTVIE(InfoExtractor): subtitles.setdefault(track['language'], []).append({ 'url': track['source'], 'name': track.get('label'), - 'ext': 'vtt' + 'ext': 'vtt', }) return { diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index cbf3dd0..3a8e743 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -19,7 +19,7 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download @@ -39,16 +39,16 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download 'skip_download': True, }, - } + }, ] - _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player' def _get_feed_query(self, uri): return { diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index c4621ca..87f0117 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -98,8 +98,8 @@ class BFMTVArticleIE(BFMTVBaseIE): 'timestamp': 1673341692, 'duration': 109.269, 'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'], - 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg' - } + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 02d1ba0..9c55bb9 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -1,10 +1,8 @@ +import base64 import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, -) class BigflixIE(InfoExtractor): @@ -21,7 +19,7 @@ class BigflixIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { # multiple formats 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', @@ -38,7 +36,7 @@ class BigflixIE(InfoExtractor): webpage, 'title') def decode_url(quoted_b64_url): - return compat_b64decode(compat_urllib_parse_unquote( + return base64.b64decode(urllib.parse.unquote( quoted_b64_url)).decode('utf-8') formats = [] @@ -47,7 +45,7 @@ class BigflixIE(InfoExtractor): video_url = decode_url(encoded_url) f = { 'url': video_url, - 'format_id': '%sp' % height, + 'format_id': f'{height}p', 'height': int(height), } if video_url.startswith('rtmp'): @@ -69,5 +67,5 @@ class BigflixIE(InfoExtractor): 'id': video_id, 'title': title, 'description': description, - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index acf78e4..b1c230f 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -36,7 +36,7 @@ class BigoIE(InfoExtractor): raise ExtractorError('Received invalid JSON data') if info_raw.get('code'): raise ExtractorError( - 'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True) + 'Bigo says: {} (code {})'.format(info_raw.get('msg'), info_raw.get('code')), expected=True) info = info_raw.get('data') or {} if not info.get('alive'): diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py index eb28932..2ba6370 100644 --- a/yt_dlp/extractor/bild.py +++ b/yt_dlp/extractor/bild.py @@ -20,7 +20,7 @@ class BildIE(InfoExtractor): 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 196, - } + }, }, { 'note': 'static MP4 and HLS', 'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html', @@ -32,7 +32,7 @@ class BildIE(InfoExtractor): 'description': 'md5:709b543c24dc31bbbffee73bccda34ad', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 69, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index b38c90b..a84b7a6 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -31,12 +31,12 @@ from ..utils import ( mimetype2ext, parse_count, parse_qs, + parse_resolution, qualities, smuggle_url, srt_subtitles_timecode, str_or_none, traverse_obj, - try_call, unified_timestamp, unsmuggle_url, url_or_none, @@ -47,6 +47,23 @@ from ..utils import ( class BilibiliBaseIE(InfoExtractor): _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?') + _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session + _wbi_key_cache = {} + + @property + def is_logged_in(self): + return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA')) + + def _check_missing_formats(self, play_info, formats): + parsed_qualities = set(traverse_obj(formats, (..., 'quality'))) + missing_formats = join_nonempty(*[ + traverse_obj(fmt, 'new_description', 'display_desc', 'quality') + for fmt in traverse_obj(play_info, ( + 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ') + if missing_formats: + self.to_screen( + f'Format(s) {missing_formats} are missing; you have to login or ' + f'become a premium member to download them. {self._login_hint()}') def extract_formats(self, play_info): format_names = { @@ -86,18 +103,75 @@ class BilibiliBaseIE(InfoExtractor): 'format': format_names.get(video.get('id')), } for video in traverse_obj(play_info, ('dash', 'video', ...))) - missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality'))) - if missing_formats: - self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; ' - f'you have to login or become premium member to download them. {self._login_hint()}') + if formats: + self._check_missing_formats(play_info, formats) + fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), { + 'url': ('url', {url_or_none}), + 'duration': ('length', {functools.partial(float_or_none, scale=1000)}), + 'filesize': ('size', {int_or_none}), + })) + if fragments: + formats.append({ + 'url': fragments[0]['url'], + 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))), + **({ + 'fragments': fragments, + 'protocol': 'http_dash_segments', + } if len(fragments) > 1 else {}), + **traverse_obj(play_info, { + 'quality': ('quality', {int_or_none}), + 'format_id': ('quality', {str_or_none}), + 'format_note': ('quality', {lambda x: format_names.get(x)}), + 'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}), + }), + **parse_resolution(format_names.get(play_info.get('quality'))), + }) return formats - def _download_playinfo(self, video_id, cid, headers=None): + def _get_wbi_key(self, video_id): + if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT: + return self._wbi_key_cache['key'] + + session_data = self._download_json( + 'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign') + + lookup = ''.join(traverse_obj(session_data, ( + 'data', 'wbi_img', ('img_url', 'sub_url'), + {lambda x: x.rpartition('/')[2].partition('.')[0]}))) + + # from getMixinKey() in the vendor js + mixin_key_enc_tab = [ + 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, + 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, + 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, + 36, 20, 34, 44, 52, + ] + + self._wbi_key_cache.update({ + 'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32], + 'ts': time.time(), + }) + return self._wbi_key_cache['key'] + + def _sign_wbi(self, params, video_id): + params['wts'] = round(time.time()) + params = { + k: ''.join(filter(lambda char: char not in "!'()*", str(v))) + for k, v in sorted(params.items()) + } + query = urllib.parse.urlencode(params) + params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest() + return params + + def _download_playinfo(self, bvid, cid, headers=None, qn=None): + params = {'bvid': bvid, 'cid': cid, 'fnval': 4048} + if qn: + params['qn'] = qn return self._download_json( - 'https://api.bilibili.com/x/player/playurl', video_id, - query={'bvid': video_id, 'cid': cid, 'fnval': 4048}, - note=f'Downloading video formats for cid {cid}', headers=headers)['data'] + 'https://api.bilibili.com/x/player/wbi/playurl', bvid, + query=self._sign_wbi(params, bvid), headers=headers, + note=f'Downloading video formats for cid {cid} {qn or ""}')['data'] def json2srt(self, json_data): srt_data = '' @@ -112,21 +186,21 @@ class BilibiliBaseIE(InfoExtractor): 'danmaku': [{ 'ext': 'xml', 'url': f'https://comment.bilibili.com/{cid}.xml', - }] + }], } - subtitle_info = traverse_obj(self._download_json( + video_info = self._download_json( 'https://api.bilibili.com/x/player/v2', video_id, query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid}, - note=f'Extracting subtitle info {cid}'), ('data', 'subtitle')) - subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan'])) - if not subs_list and traverse_obj(subtitle_info, 'allow_submit'): - if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie - self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True) - for s in subs_list: + note=f'Extracting subtitle info {cid}') + if traverse_obj(video_info, ('data', 'need_login_subtitle')): + self.report_warning( + f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True) + for s in traverse_obj(video_info, ( + 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])): subtitles.setdefault(s['lan'], []).append({ 'ext': 'srt', - 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)) + 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)), }) return subtitles @@ -203,19 +277,19 @@ class BilibiliBaseIE(InfoExtractor): self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges) return cid_edges - def _get_interactive_entries(self, video_id, cid, metainfo): + def _get_interactive_entries(self, video_id, cid, metainfo, headers=None): graph_version = traverse_obj( self._download_json( 'https://api.bilibili.com/x/player/wbi/v2', video_id, - 'Extracting graph version', query={'bvid': video_id, 'cid': cid}), + 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers), ('data', 'interaction', 'graph_version', {int_or_none})) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) for cid, edges in cid_edges.items(): - play_info = self._download_playinfo(video_id, cid) + play_info = self._download_playinfo(video_id, cid, headers=headers) yield { **metainfo, 'id': f'{video_id}_{cid}', - 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}', + 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}', 'formats': self.extract_formats(play_info), 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}', 'duration': float_or_none(play_info.get('timelength'), scale=1000), @@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE): 'timestamp': 1488353834, 'like_count': int, 'view_count': int, + '_old_archive_ids': ['bilibili 8903802_part1'], }, }, { 'note': 'old av URL version', 'url': 'http://www.bilibili.com/video/av1074402/', 'info_dict': { - 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$', + 'id': 'BV11x411K7CN', 'ext': 'mp4', + 'title': '【金坷垃】金泡沫', 'uploader': '菊子桑', 'uploader_id': '156160', - 'id': 'BV11x411K7CN', - 'title': '【金坷垃】金泡沫', 'duration': 308.36, 'upload_date': '20140420', 'timestamp': 1397983878, @@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE): 'comment_count': int, 'view_count': int, 'tags': list, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$', + '_old_archive_ids': ['bilibili 1074402_part1'], }, 'params': {'skip_download': True}, }, { @@ -269,7 +345,7 @@ class BiliBiliIE(BilibiliBaseIE): 'url': 'https://www.bilibili.com/video/BV1bK411W797', 'info_dict': { 'id': 'BV1bK411W797', - 'title': '物语中的人物是如何吐槽自己的OP的' + 'title': '物语中的人物是如何吐槽自己的OP的', }, 'playlist_count': 18, 'playlist': [{ @@ -288,8 +364,9 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } - }] + '_old_archive_ids': ['bilibili 498159642_part1'], + }, + }], }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } - }, { - 'note': 'video has subtitles', - 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', - 'info_dict': { - 'id': 'BV12N4y1M7rh', - 'ext': 'mp4', - 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1', - 'tags': list, - 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4', - 'duration': 313.557, - 'upload_date': '20220709', - 'uploader': '小夫太渴', - 'timestamp': 1657347907, - 'uploader_id': '1326814124', - 'comment_count': int, - 'view_count': int, - 'like_count': int, - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', - 'subtitles': 'count:2' + '_old_archive_ids': ['bilibili 498159642_part1'], }, - 'params': {'listsubtitles': True}, }, { 'url': 'https://www.bilibili.com/video/av8903802/', 'info_dict': { @@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE): 'comment_count': int, 'view_count': int, 'like_count': int, + '_old_archive_ids': ['bilibili 8903802_part1'], }, 'params': { 'skip_download': True, @@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 463665680_part1'], }, 'params': {'skip_download': True}, }, { @@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 893839363_part1'], }, - 'params': {'skip_download': True}, }, { 'note': 'newer festival video', 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f', @@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 778246196_part1'], + }, + }, { + 'note': 'legacy flv/mp4 video', + 'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4', + 'info_dict': { + 'id': 'BV1ms411Q7vw_p4', + 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛', + 'timestamp': 1458222815, + 'upload_date': '20160317', + 'description': '云南方言快乐生产线出品', + 'duration': float, + 'uploader': '一笑颠天', + 'uploader_id': '3916081', + 'view_count': int, + 'comment_count': int, + 'like_count': int, + 'tags': list, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 4120229_part4'], + }, + 'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}}, + 'playlist_count': 19, + 'playlist': [{ + 'info_dict': { + 'id': 'BV1ms411Q7vw_p4_0', + 'ext': 'flv', + 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛', + 'duration': 399.102, + }, + }], + }, { + 'note': 'legacy mp4-only video', + 'url': 'https://www.bilibili.com/video/BV1nx411u79K', + 'info_dict': { + 'id': 'BV1nx411u79K', + 'ext': 'mp4', + 'title': '【练习室】201603声乐练习《No Air》with VigoVan', + 'timestamp': 1508893551, + 'upload_date': '20171025', + 'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van', + 'duration': 80.384, + 'uploader': '伯远', + 'uploader_id': '10584494', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'tags': list, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 15700301_part1'], }, - 'params': {'skip_download': True}, }, { 'note': 'interactive/split-path video', 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/', @@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 292734508_part1'], }, 'playlist_count': 33, 'playlist': [{ @@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 292734508_part1'], }, }], }, { @@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE): 'upload_date': '20191021', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, + }, { + 'note': 'video has subtitles, which requires login', + 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', + 'info_dict': { + 'id': 'BV12N4y1M7rh', + 'ext': 'mp4', + 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1', + 'tags': list, + 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4', + 'duration': 313.557, + 'upload_date': '20220709', + 'uploader': '小夫太渴', + 'timestamp': 1657347907, + 'uploader_id': '1326814124', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'subtitles': 'count:2', # login required for CC subtitle + '_old_archive_ids': ['bilibili 898179753_part1'], + }, + 'params': {'listsubtitles': True}, + 'skip': 'login required for subtitle', }, { 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/', 'info_dict': { @@ -498,8 +631,9 @@ class BiliBiliIE(BilibiliBaseIE): if not self._match_valid_url(urlh.url): return self.url_result(urlh.url) - initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + headers['Referer'] = url + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) is_festival = 'videoData' not in initial_state if is_festival: video_data = initial_state['videoInfo'] @@ -548,7 +682,6 @@ class BiliBiliIE(BilibiliBaseIE): aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') - cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') festival_info = {} @@ -586,19 +719,65 @@ class BiliBiliIE(BilibiliBaseIE): is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate')) if is_interactive: return self.playlist_result( - self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{ - 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), - '__post_extractor': self.extract_comments(aid), - }) + self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo, + duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), + __post_extractor=self.extract_comments(aid)) else: - return { - **metainfo, - 'duration': float_or_none(play_info.get('timelength'), scale=1000), - 'chapters': self._get_chapters(aid, cid), - 'subtitles': self.extract_subtitles(video_id, cid), - 'formats': self.extract_formats(play_info), - '__post_extractor': self.extract_comments(aid), - } + formats = self.extract_formats(play_info) + + if not traverse_obj(play_info, ('dash')): + # we only have legacy formats and need additional work + has_qn = lambda x: x in traverse_obj(formats, (..., 'quality')) + for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})): + formats.extend(traverse_obj( + self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)), + lambda _, v: not has_qn(v['quality']))) + self._check_missing_formats(play_info, formats) + flv_formats = traverse_obj(formats, lambda _, v: v['fragments']) + if flv_formats and len(flv_formats) < len(formats): + # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one + if not self._configuration_arg('prefer_multi_flv'): + dropped_fmts = ', '.join( + f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats) + formats = traverse_obj(formats, lambda _, v: not v.get('fragments')) + if dropped_fmts: + self.to_screen( + f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. ' + 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"') + else: + formats = traverse_obj( + # XXX: Filtering by extractor-arg is for testing purposes + formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]), + ) or [max(flv_formats, key=lambda x: x['quality'])] + + if traverse_obj(formats, (0, 'fragments')): + # We have flv formats, which are individual short videos with their own timestamps and metainfo + # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround + return { + **metainfo, + '_type': 'multi_video', + 'entries': [{ + 'id': f'{metainfo["id"]}_{idx}', + 'title': metainfo['title'], + 'http_headers': metainfo['http_headers'], + 'formats': [{ + **fragment, + 'format_id': formats[0].get('format_id'), + }], + 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None, + '__post_extractor': self.extract_comments(aid) if idx == 0 else None, + } for idx, fragment in enumerate(formats[0]['fragments'])], + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + } + else: + return { + **metainfo, + 'formats': formats, + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + 'chapters': self._get_chapters(aid, cid), + 'subtitles': self.extract_subtitles(video_id, cid), + '__post_extractor': self.extract_comments(aid), + } class BiliBiliBangumiIE(BilibiliBaseIE): @@ -640,7 +819,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1425.256, 'timestamp': 1554566400, 'upload_date': '20190406', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'skip': 'Geo-restricted', }, { @@ -661,7 +840,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1922.129, 'timestamp': 1602853860, 'upload_date': '20201016', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }] @@ -764,7 +943,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'duration': 1525.777, 'timestamp': 1425074413, 'upload_date': '20150227', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -794,7 +973,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'title': '鬼灭之刃', 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b', }, - 'playlist_mincount': 26 + 'playlist_mincount': 26, }, { 'url': 'https://www.bilibili.com/bangumi/play/ss2251', 'info_dict': { @@ -819,7 +998,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'duration': 1436.992, 'timestamp': 1343185080, 'upload_date': '20120725', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -906,7 +1085,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -939,7 +1118,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }], 'params': {'playlist_items': '1'}, }, { @@ -969,7 +1148,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE): })) -class BilibiliSpaceBaseIE(InfoExtractor): +class BilibiliSpaceBaseIE(BilibiliBaseIE): def _extract_playlist(self, fetch_page, get_metadata, get_entries): first_page = fetch_page(0) metadata = get_metadata(first_page) @@ -989,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): 'id': '3985676', }, 'playlist_mincount': 178, + 'skip': 'login required', }, { 'url': 'https://space.bilibili.com/313580179/video', 'info_dict': { 'id': '313580179', }, 'playlist_mincount': 92, + 'skip': 'login required', }] - def _extract_signature(self, playlist_id): - session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False) - - key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0] - img_key = traverse_obj( - session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100' - sub_key = traverse_obj( - session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6' - - session_key = img_key + sub_key - - signature_values = [] - for position in ( - 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, - 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, - 57, 62, 11, 36, 20, 34, 44, 52 - ): - char_at_position = try_call(lambda: session_key[position]) - if char_at_position: - signature_values.append(char_at_position) - - return ''.join(signature_values)[:32] - def _real_extract(self, url): playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video') if not is_video_url: self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. ' 'To download audios, add a "/audio" to the URL') - signature = self._extract_signature(playlist_id) - def fetch_page(page_idx): query = { 'keyword': '', 'mid': playlist_id, - 'order': 'pubdate', + 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate', 'order_avoided': 'true', 'platform': 'web', 'pn': page_idx + 1, 'ps': 30, 'tid': 0, 'web_location': 1550101, - 'wts': int(time.time()), } - query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest() try: - response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search', - playlist_id, note=f'Downloading page {page_idx}', query=query, - headers={'referer': url}) + response = self._download_json( + 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id, + query=self._sign_wbi(query, playlist_id), + note=f'Downloading space page {page_idx}', headers={'Referer': url}) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 412: raise ExtractorError( 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True) raise - if response['code'] in (-352, -401): + status_code = response['code'] + if status_code == -401: raise ExtractorError( - f'Request is blocked by server ({-response["code"]}), ' - 'please add cookies, wait and try later.', expected=True) + 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True) + elif status_code == -352 and not self.is_logged_in: + self.raise_login_required('Request is rejected, you need to login to access playlist') + elif status_code != 0: + raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}') return response['data'] def get_metadata(page_data): @@ -1163,7 +1322,7 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE): 'uploader_id': ('meta', 'mid', {str_or_none}), 'timestamp': ('meta', 'ptime', {int_or_none}), 'thumbnail': ('meta', 'cover', {url_or_none}), - }) + }), } def get_entries(page_data): @@ -1195,7 +1354,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE): mid, sid = self._match_valid_url(url).group('mid', 'sid') playlist_id = f'{mid}_{sid}' playlist_meta = traverse_obj(self._download_json( - f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False + f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False, ), { 'title': ('data', 'meta', 'name', {str}), 'description': ('data', 'meta', 'description', {str}), @@ -1217,7 +1376,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE): 'page_count': math.ceil(entry_count / page_size), 'page_size': page_size, 'uploader': self._get_uploader(mid, playlist_id), - **playlist_meta + **playlist_meta, } def get_entries(page_data): @@ -1241,7 +1400,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE): 'upload_date': '20201109', 'modified_timestamp': int, 'modified_date': str, - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', 'view_count': int, 'like_count': int, }, @@ -1281,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE): _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)' _TESTS = [{ 'url': 'https://www.bilibili.com/watchlater/#/list', - 'info_dict': {'id': 'watchlater'}, + 'info_dict': { + 'id': r're:\d+', + 'title': '稍后再看', + }, 'playlist_mincount': 0, 'skip': 'login required', }] @@ -1345,7 +1507,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'uploader_id': '84912', 'timestamp': 1604905176, 'upload_date': '20201109', - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', }, 'playlist_mincount': 22, }, { @@ -1357,21 +1519,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'skip': 'redirect url', }, { 'url': 'https://www.bilibili.com/list/watchlater', - 'info_dict': {'id': 'watchlater'}, + 'info_dict': { + 'id': r're:2_\d+', + 'title': '稍后再看', + 'uploader': str, + 'uploader_id': str, + }, 'playlist_mincount': 0, 'skip': 'login required', }, { 'url': 'https://www.bilibili.com/medialist/play/watchlater', 'info_dict': {'id': 'watchlater'}, 'playlist_mincount': 0, - 'skip': 'login required', + 'skip': 'redirect url & login required', }] def _extract_medialist(self, query, list_id): for page_num in itertools.count(1): page_data = self._download_json( 'https://api.bilibili.com/x/v2/medialist/resource/list', - list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}' + list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}', )['data'] yield from self._get_entries(page_data, 'media_list', ending_key='bv_id') query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id')) @@ -1407,7 +1574,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'tid': ('tid', {int_or_none}), 'sort_field': ('sortFiled', {int_or_none}), 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}), - }) + }), } metadata = { 'id': f'{query["type"]}_{query["biz_id"]}', @@ -1415,7 +1582,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'title': ('title', {str}), 'uploader': ('upper', 'name', {str}), 'uploader_id': ('upper', 'mid', {str_or_none}), - 'timestamp': ('ctime', {int_or_none}), + 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}), 'thumbnail': ('cover', {url_or_none}), })), } @@ -1430,26 +1597,26 @@ class BilibiliCategoryIE(InfoExtractor): 'url': 'https://www.bilibili.com/v/kichiku/mad', 'info_dict': { 'id': 'kichiku: mad', - 'title': 'kichiku: mad' + 'title': 'kichiku: mad', }, 'playlist_mincount': 45, 'params': { - 'playlistend': 45 - } + 'playlistend': 45, + }, }] def _fetch_page(self, api_url, num_pages, query, page_num): parsed_json = self._download_json( api_url, query, query={'Search_key': query, 'pn': page_num}, - note='Extracting results from page %s of %s' % (page_num, num_pages)) + note=f'Extracting results from page {page_num} of {num_pages}') video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list) if not video_list: - raise ExtractorError('Failed to retrieve video list for page %d' % page_num) + raise ExtractorError(f'Failed to retrieve video list for page {page_num}') for video in video_list: yield self.url_result( - 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid']) + 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid']) def _entries(self, category, subcategory, query): # map of categories : subcategories : RIDs @@ -1459,7 +1626,7 @@ class BilibiliCategoryIE(InfoExtractor): 'manual_vocaloid': 126, 'guide': 22, 'theatre': 216, - 'course': 127 + 'course': 127, }, } @@ -1485,7 +1652,7 @@ class BilibiliCategoryIE(InfoExtractor): def _real_extract(self, url): category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4] - query = '%s: %s' % (category, subcategory) + query = f'{category}: {subcategory}' return self.playlist_result(self._entries(category, subcategory, query), query, query) @@ -1588,7 +1755,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): formats = [{ 'url': play_data['cdns'][0], 'filesize': int_or_none(play_data.get('size')), - 'vcodec': 'none' + 'vcodec': 'none', }] for a_format in formats: @@ -1606,7 +1773,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): subtitles = { 'origin': [{ 'url': lyric, - }] + }], } return { @@ -1674,7 +1841,7 @@ class BiliBiliPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) return self.url_result( - 'http://www.bilibili.tv/video/av%s/' % video_id, + f'http://www.bilibili.tv/video/av{video_id}/', ie=BiliBiliIE.ie_key(), video_id=video_id) @@ -1702,11 +1869,10 @@ class BiliIntlBaseIE(InfoExtractor): return json.get('data') def json2srt(self, json): - data = '\n\n'.join( + return '\n\n'.join( f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}' for i, line in enumerate(traverse_obj(json, ( 'body', lambda _, l: l['content'] and l['from'] and l['to'])))) - return data def _get_subtitles(self, *, ep_id=None, aid=None): sub_json = self._call_api( @@ -1808,14 +1974,15 @@ class BiliIntlBaseIE(InfoExtractor): note='Downloading login key', errnote='Unable to download login key')['data'] public_key = Cryptodome.RSA.importKey(key_data['key']) - password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) + password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode()) login_post = self._download_json( - 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ + 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, + data=urlencode_postdata({ 'username': username, 'password': base64.b64encode(password_hash).decode('ascii'), 'keep_me': 'true', 's_locale': 'en_US', - 'isTrusted': 'true' + 'isTrusted': 'true', }), note='Logging in', errnote='Unable to log in') if login_post.get('code'): if login_post.get('message'): @@ -1842,17 +2009,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 76.242, - 'title': '' + 'title': '', }, { 'start_time': 76.242, 'end_time': 161.161, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1325.742, 'end_time': 1403.903, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Non-Bstation page 'url': 'https://www.bilibili.tv/en/play/1033760/11005006', @@ -1869,17 +2036,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 88.0, - 'title': '' + 'title': '', }, { 'start_time': 88.0, 'end_time': 156.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1173.0, 'end_time': 1259.535, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Subtitle with empty content 'url': 'https://www.bilibili.tv/en/play/1005144/10131790', @@ -1890,7 +2057,7 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 140, }, - 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.' + 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.', }, { # episode comment extraction 'url': 'https://www.bilibili.tv/en/play/34580/340317', @@ -1908,20 +2075,20 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 61.0, - 'title': '' + 'title': '', }, { 'start_time': 61.0, 'end_time': 134.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1290.0, 'end_time': 1379.0, - 'title': 'Outro' + 'title': 'Outro', }], }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # user generated content comment extraction 'url': 'https://www.bilibili.tv/en/video/2045730385', @@ -1936,8 +2103,8 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg', }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # episode id without intro and outro 'url': 'https://www.bilibili.tv/en/play/1048837/11246489', @@ -1992,7 +2159,7 @@ class BiliIntlIE(BiliIntlBaseIE): # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) video_data = traverse_obj(season_json, ( - 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id + 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id, ), expected_type=dict, get_all=False) # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found @@ -2024,7 +2191,7 @@ class BiliIntlIE(BiliIntlBaseIE): 'id': replies.get('rpid'), 'like_count': int_or_none(replies.get('like_count')), 'parent': replies.get('parent'), - 'timestamp': unified_timestamp(replies.get('ctime_text')) + 'timestamp': unified_timestamp(replies.get('ctime_text')), } if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')): @@ -2077,11 +2244,11 @@ class BiliIntlIE(BiliIntlBaseIE): chapters = [{ 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000), - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000), - 'title': 'Outro' + 'title': 'Outro', }] return { @@ -2137,12 +2304,13 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): episode_id = str(episode['episode_id']) yield self.url_result(smuggle_url( BiliIntlIE._make_url(episode_id, series_id), - self._parse_video_metadata(episode) + self._parse_video_metadata(episode), ), BiliIntlIE, episode_id) def _real_extract(self, url): series_id = self._match_id(url) - series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {} + series_info = self._call_api( + f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {} return self.playlist_result( self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'), categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none), @@ -2156,19 +2324,19 @@ class BiliLiveIE(InfoExtractor): 'url': 'https://live.bilibili.com/196', 'info_dict': { 'id': '33989', - 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)", + 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)', 'ext': 'flv', - 'title': "太空狼人杀联动,不被爆杀就算赢", - 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg", + 'title': '太空狼人杀联动,不被爆杀就算赢', + 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg', 'timestamp': 1650802769, }, - 'skip': 'not live' + 'skip': 'not live', }, { 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://live.bilibili.com/blanc/196', - 'only_matching': True + 'only_matching': True, }] _FORMATS = { @@ -2209,7 +2377,7 @@ class BiliLiveIE(InfoExtractor): raise ExtractorError('Streamer is not live', expected=True) formats = [] - for qn in self._FORMATS.keys(): + for qn in self._FORMATS: stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, { 'room_id': room_id, 'qn': qn, diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index 194bf1f..c83222e 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -24,7 +24,7 @@ from ..utils import ( class BitChuteIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P[^/?#&]+)' _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P{_VALID_URL})'] _TESTS = [{ 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', @@ -39,7 +39,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20170103', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, }, { # test case: video with different channel and uploader @@ -55,7 +55,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20231106', 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', 'channel': 'Full Measure with Sharyl Attkisson', - 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/' + 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/', }, }, { # video not downloadable in browser, but we can recover it @@ -72,7 +72,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20181113', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, 'params': {'check_formats': None}, }, { @@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor): }, { 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', 'only_matching': True, + }, { + 'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/', + 'only_matching': True, }] _GEO_BYPASS = False @@ -115,7 +118,7 @@ class BitChuteIE(InfoExtractor): continue return { 'url': url, - 'filesize': int_or_none(response.headers.get('Content-Length')) + 'filesize': int_or_none(response.headers.get('Content-Length')), } def _raise_if_restricted(self, webpage): @@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) + f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) self._raise_if_restricted(webpage) publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) @@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor): class BitChuteChannelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?Pchannel|playlist)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?Pchannel|playlist)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bitchute.com/channel/bitchute/', 'info_dict': { 'id': 'bitchute', 'title': 'BitChute', - 'description': 'md5:5329fb3866125afa9446835594a9b138', + 'description': 'md5:2134c37d64fc3a4846787c402956adac', }, 'playlist': [ { @@ -196,7 +199,7 @@ class BitChuteChannelIE(InfoExtractor): 'duration': 16, 'view_count': int, }, - } + }, ], 'params': { 'skip_download': True, @@ -209,7 +212,10 @@ class BitChuteChannelIE(InfoExtractor): 'id': 'wV9Imujxasw9', 'title': 'Bruce MacDonald and "The Light of Darkness"', 'description': 'md5:747724ef404eebdfc04277714f81863e', - } + }, + }, { + 'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/', + 'only_matching': True, }] _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' @@ -224,13 +230,13 @@ class BitChuteChannelIE(InfoExtractor): 'container': 'playlist-video', 'title': 'title', 'description': 'description', - } + }, } @staticmethod def _make_url(playlist_id, playlist_type): - return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/' + return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/' def _fetch_page(self, playlist_id, playlist_type, page_num): playlist_url = self._make_url(playlist_id, playlist_type) diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 8f41c89..5358909 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -47,7 +47,7 @@ class BlackboardCollaborateIE(InfoExtractor): region = mobj.group('region') video_id = mobj.group('id') info = self._download_json( - 'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id) + f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) duration = info.get('duration') title = info['name'] upload_date = info.get('created') diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index aa3d63e..71b237d 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -44,7 +44,7 @@ class BleacherReportIE(InfoExtractor): def _real_extract(self, url): article_id = self._match_id(url) - article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] + article_data = self._download_json(f'http://api.bleacherreport.com/api/v1/articles/{article_id}', article_id)['article'] thumbnails = [] primary_photo = article_data.get('primaryPhoto') @@ -71,11 +71,11 @@ class BleacherReportIE(InfoExtractor): if video: video_type = video['type'] if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): - info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] + info['url'] = 'http://bleacherreport.com/video_embed?id={}'.format(video['id']) elif video_type == 'youtube.com': info['url'] = video['id'] elif video_type == 'vine.co': - info['url'] = 'https://vine.co/v/%s' % video['id'] + info['url'] = 'https://vine.co/v/{}'.format(video['id']) else: info['url'] = video_type + video['id'] return info @@ -99,12 +99,12 @@ class BleacherReportCMSIE(AMPIE): }, 'expected_warnings': [ - 'Unable to download f4m manifest' - ] + 'Unable to download f4m manifest', + ], }] def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) + info = self._extract_feed_info(f'http://vid.bleacherreport.com/videos/{video_id}.akamai') info['id'] = video_id return info diff --git a/yt_dlp/extractor/blerp.py b/yt_dlp/extractor/blerp.py index 4631ad2..f4f2248 100644 --- a/yt_dlp/extractor/blerp.py +++ b/yt_dlp/extractor/blerp.py @@ -16,7 +16,7 @@ class BlerpIE(InfoExtractor): 'uploader_id': '5fb81e51aa66ae000c395478', 'ext': 'mp3', 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'], - } + }, }, { 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f', 'info_dict': { @@ -25,11 +25,11 @@ class BlerpIE(InfoExtractor): 'uploader': '179617322678353920', 'uploader_id': '5ba99cf71386730004552c42', 'ext': 'mp3', - 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'] - } + 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'], + }, }] - _GRAPHQL_OPERATIONNAME = "webBitePageGetBite" + _GRAPHQL_OPERATIONNAME = 'webBitePageGetBite' _GRAPHQL_QUERY = ( '''query webBitePageGetBite($_id: MongoID!) { web { @@ -141,27 +141,26 @@ class BlerpIE(InfoExtractor): 'operationName': self._GRAPHQL_OPERATIONNAME, 'query': self._GRAPHQL_QUERY, 'variables': { - '_id': audio_id - } + '_id': audio_id, + }, } headers = { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', } - json_result = self._download_json('https://api.blerp.com/graphql', - audio_id, data=json.dumps(data).encode('utf-8'), headers=headers) + json_result = self._download_json( + 'https://api.blerp.com/graphql', audio_id, + data=json.dumps(data).encode(), headers=headers) bite_json = json_result['data']['web']['biteById'] - info_dict = { + return { 'id': bite_json['_id'], 'url': bite_json['audio']['mp3']['url'], 'title': bite_json['title'], 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none), 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none), 'ext': 'mp3', - 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None) + 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None), } - - return info_dict diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index ef0151d..1614b6f 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -21,14 +21,14 @@ class BloggerIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': r're:^https?://.*', 'duration': 76.068, - } + }, }] def _real_extract(self, url): token_id = self._match_id(url) webpage = self._download_webpage(url, token_id) data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data') - data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id) + data = self._parse_json(data_json.encode().decode('unicode_escape'), token_id) streams = data['streams'] formats = [{ 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))), diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index 792155e..ec6b7a8 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -55,7 +55,7 @@ class BloombergIE(InfoExtractor): title = re.sub(': Video$', '', self._og_search_title(webpage)) embed_info = self._download_json( - 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id) + f'http://www.bloomberg.com/multimedia/api/embed?id={video_id}', video_id) formats = [] for stream in embed_info['streams']: stream_url = stream.get('url') diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py index ca326f2..5fe937a 100644 --- a/yt_dlp/extractor/bokecc.py +++ b/yt_dlp/extractor/bokecc.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_parse_qs from ..utils import ExtractorError @@ -9,20 +10,18 @@ class BokeCCBaseIE(InfoExtractor): r'<(?:script|embed)[^>]+src=(?P["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P.+?)(?P=q)', webpage, 'player params', group='query') - player_params = compat_parse_qs(player_params_str) + player_params = urllib.parse.parse_qs(player_params_str) info_xml = self._download_xml( - 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( + 'http://p.bokecc.com/servlet/playinfo?uid={}&vid={}&m=1'.format( player_params['siteid'][0], player_params['vid'][0]), video_id) - formats = [{ + return [{ 'format_id': format_id, 'url': quality.find('./copy').attrib['playurl'], 'quality': int(quality.attrib['value']), } for quality in info_xml.findall('./video/quality')] - return formats - class BokeCCIE(BokeCCBaseIE): _IE_DESC = 'CC视频' @@ -38,11 +37,11 @@ class BokeCCIE(BokeCCBaseIE): }] def _real_extract(self, url): - qs = compat_parse_qs(self._match_valid_url(url).group('query')) + qs = urllib.parse.parse_qs(self._match_valid_url(url).group('query')) if not qs.get('vid') or not qs.get('uid'): raise ExtractorError('Invalid URL', expected=True) - video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0]) + video_id = '{}_{}'.format(qs['uid'][0], qs['vid'][0]) webpage = self._download_webpage(url, video_id) diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index bf95566..ab85477 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, try_get, @@ -38,7 +37,7 @@ class BongaCamsIE(InfoExtractor): channel_id = mobj.group('id') amf = self._download_json( - 'https://%s/tools/amf.php' % host, channel_id, + f'https://{host}/tools/amf.php', channel_id, data=urlencode_postdata(( ('method', 'getRoomData'), ('args[]', channel_id), @@ -48,14 +47,14 @@ class BongaCamsIE(InfoExtractor): server_url = amf['localData']['videoServerUrl'] uploader_id = try_get( - amf, lambda x: x['performerData']['username'], compat_str) or channel_id + amf, lambda x: x['performerData']['username'], str) or channel_id uploader = try_get( - amf, lambda x: x['performerData']['displayName'], compat_str) + amf, lambda x: x['performerData']['displayName'], str) like_count = int_or_none(try_get( amf, lambda x: x['performerData']['loversCount'])) formats = self._extract_m3u8_formats( - '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id), + f'{server_url}/hls/stream_{uploader_id}/playlist.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True) return { diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 2675866..f5b8196 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -57,8 +57,7 @@ class BostonGlobeIE(InfoExtractor): if video_id and account_id and player_id and embed: entries.append( - 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' - % (account_id, player_id, embed, video_id)) + f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}') if len(entries) == 0: return self.url_result(url, 'Generic') diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 008c011..3547ad9 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -72,20 +72,20 @@ class BoxIE(InfoExtractor): 'BoxApi': 'shared_link=' + shared_link, 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats }, query={ - 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size' + 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size', }) title = f['name'] query = { 'access_token': access_token, - 'shared_link': shared_link + 'shared_link': shared_link, } formats = [] for url_tmpl in traverse_obj(f, ( 'representations', 'entries', lambda _, v: v['representation'] == 'dash', - 'content', 'url_template', {url_or_none} + 'content', 'url_template', {url_or_none}, )): manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query) fmts = self._extract_mpd_formats(manifest_url, file_id) diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py index da06cc3..efa6699 100644 --- a/yt_dlp/extractor/boxcast.py +++ b/yt_dlp/extractor/boxcast.py @@ -21,7 +21,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }, { 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad', 'info_dict': { @@ -30,8 +30,8 @@ class BoxCastVideoIE(InfoExtractor): 'uploader_id': 'vctwevwntun3o0ikq7af', 'uploader': 'Legacy Christian Church', 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools', - 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg' - } + 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg', + }, }, { 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev', 'info_dict': { @@ -44,7 +44,7 @@ class BoxCastVideoIE(InfoExtractor): 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland', 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340', 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022', - } + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://childrenshealthdefense.eu/live-stream/', @@ -57,7 +57,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 6e1c63e..0568e06 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -61,7 +61,7 @@ class BRIE(InfoExtractor): 'title': 'Umweltbewusster Häuslebauer', 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2', 'duration': 116, - } + }, }, { 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', @@ -74,7 +74,7 @@ class BRIE(InfoExtractor): 'duration': 893, 'uploader': 'Eva Maria Steimle', 'upload_date': '20170208', - } + }, }, ] @@ -142,7 +142,7 @@ class BRIE(InfoExtractor): http_format_info = format_info.copy() http_format_info.update({ 'url': format_url, - 'format_id': 'http-%s' % asset_type, + 'format_id': f'http-{asset_type}', }) formats.append(http_format_info) server_prefix = xpath_text(asset, 'serverPrefix') @@ -151,7 +151,7 @@ class BRIE(InfoExtractor): rtmp_format_info.update({ 'url': server_prefix, 'play_path': xpath_text(asset, 'fileName'), - 'format_id': 'rtmp-%s' % asset_type, + 'format_id': f'rtmp-{asset_type}', }) formats.append(rtmp_format_info) return formats diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py index 04b1dd8..df10299 100644 --- a/yt_dlp/extractor/brainpop.py +++ b/yt_dlp/extractor/brainpop.py @@ -52,8 +52,8 @@ class BrainPOPBaseIE(InfoExtractor): '%s': {}, 'ad_%s': { 'format_note': 'Audio description', - 'source_preference': -2 - } + 'source_preference': -2, + }, } for additional_key_format, additional_key_fields in additional_key_formats.items(): for key_quality, key_index in enumerate(('high', 'low')): @@ -62,7 +62,7 @@ class BrainPOPBaseIE(InfoExtractor): formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, { 'quality': -1 - key_quality, **additional_key_fields, - **extra_fields + **extra_fields, })) return formats @@ -72,7 +72,7 @@ class BrainPOPBaseIE(InfoExtractor): data=json.dumps({'username': username, 'password': password}).encode(), headers={ 'Content-Type': 'application/json', - 'Referer': self._ORIGIN + 'Referer': self._ORIGIN, }, note='Logging in', errnote='Unable to log in', expected_status=400) status_code = int_or_none(login_res['status_code']) if status_code != 1505: @@ -131,12 +131,12 @@ class BrainPOPIE(BrainPOPBaseIE): formats, subtitles = [], {} formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', { 'language': movie_feature.get('language') or 'en', - 'language_preference': 10 + 'language_preference': 10, })) for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items(): formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', { 'language': lang, - 'language_preference': -10 + 'language_preference': -10, })) # TODO: Do localization fields also have subtitles? @@ -145,7 +145,7 @@ class BrainPOPIE(BrainPOPBaseIE): r'^subtitles_(?P\w+)$', name, 'subtitle metadata', default=None) if lang and url: subtitles.setdefault(lang, []).append({ - 'url': urljoin(self._CDN_URL, url) + 'url': urljoin(self._CDN_URL, url), }) return { diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py index 419fe8c..ec72f0d 100644 --- a/yt_dlp/extractor/bravotv.py +++ b/yt_dlp/extractor/bravotv.py @@ -185,5 +185,5 @@ class BravoTVIE(AdobePassIE): 'episode_number': ('episodeNumber', {int_or_none}), 'episode': 'episodeTitle', 'series': 'show', - })) + })), } diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index b5abb7f..fedf477 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -13,7 +13,7 @@ class BreitBartIE(InfoExtractor): 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', 'age_limit': 0, - } + }, }, { 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', 'only_matching': True, @@ -30,5 +30,5 @@ class BreitBartIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'age_limit': self._rta_search(webpage), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 4190e1a..2526f25 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -1,15 +1,12 @@ import base64 import re import struct +import urllib.parse import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_parse_qs, - compat_urlparse, -) +from ..compat import compat_etree_fromstring from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -21,6 +18,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, int_or_none, + join_nonempty, js_to_json, mimetype2ext, parse_iso8601, @@ -142,7 +140,7 @@ class BrightcoveLegacyIE(InfoExtractor): # from http://www.un.org/chinese/News/story.asp?NewsID=27724 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', 'only_matching': True, # Tested in GenericIE - } + }, ] _WEBPAGE_TESTS = [{ @@ -315,7 +313,7 @@ class BrightcoveLegacyIE(InfoExtractor): object_str = fix_xml_ampersands(object_str) try: - object_doc = compat_etree_fromstring(object_str.encode('utf-8')) + object_doc = compat_etree_fromstring(object_str.encode()) except xml.etree.ElementTree.ParseError: return @@ -323,7 +321,7 @@ class BrightcoveLegacyIE(InfoExtractor): if fv_el is not None: flashvars = dict( (k, v[0]) - for k, v in compat_parse_qs(fv_el.attrib['value']).items()) + for k, v in urllib.parse.parse_qs(fv_el.attrib['value']).items()) else: flashvars = {} @@ -340,32 +338,32 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') or find_param('playerId') - if playerID is None: + player_id = find_param('playerID') or find_param('playerId') + if player_id is None: raise ExtractorError('Cannot find player ID') - params['playerID'] = playerID + params['playerID'] = player_id - playerKey = find_param('playerKey') + player_key = find_param('playerKey') # Not all pages define this value - if playerKey is not None: - params['playerKey'] = playerKey + if player_key is not None: + params['playerKey'] = player_key # These fields hold the id of the video - videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') - if videoPlayer is not None: - if isinstance(videoPlayer, list): - videoPlayer = videoPlayer[0] - videoPlayer = videoPlayer.strip() + video_player = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') + if video_player is not None: + if isinstance(video_player, list): + video_player = video_player[0] + video_player = video_player.strip() # UUID is also possible for videoPlayer (e.g. # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd # or http://www8.hp.com/cn/zh/home.html) if not (re.match( r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', - videoPlayer) or videoPlayer.startswith('ref:')): + video_player) or video_player.startswith('ref:')): return None - params['@videoPlayer'] = videoPlayer - linkBase = find_param('linkBaseURL') - if linkBase is not None: - params['linkBaseURL'] = linkBase + params['@videoPlayer'] = video_player + link_base = find_param('linkBaseURL') + if link_base is not None: + params['linkBaseURL'] = link_base return cls._make_brightcove_url(params) @classmethod @@ -389,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor): @classmethod def _make_brightcove_url(cls, params): return update_url_query( - 'http://c.brightcove.com/services/viewer/htmlFederated', params) + 'https://c.brightcove.com/services/viewer/htmlFederated', params) @classmethod def _extract_brightcove_url(cls, webpage): @@ -448,13 +446,13 @@ class BrightcoveLegacyIE(InfoExtractor): url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = self._match_valid_url(url) query_str = mobj.group('query') - query = compat_urlparse.parse_qs(query_str) + query = urllib.parse.parse_qs(query_str) - videoPlayer = query.get('@videoPlayer') - if videoPlayer: + video_player = query.get('@videoPlayer') + if video_player: # We set the original url as the default 'Referer' header referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url) - video_id = videoPlayer[0] + video_id = video_player[0] if 'playerID' not in query: mobj = re.search(r'/bcpid(\d+)', url) if mobj is not None: @@ -473,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor): if referer: headers['Referer'] = referer player_page = self._download_webpage( - 'http://link.brightcove.com/services/player/bcpid' + player_id[0], + 'https://link.brightcove.com/services/player/bcpid' + player_id[0], video_id, headers=headers, fatal=False) if player_page: player_key = self._search_regex( @@ -483,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor): enc_pub_id = player_key.split(',')[1].replace('~', '=') publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] if publisher_id: - brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}' if referer: brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) @@ -541,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE): }) def build_format_id(kind): - format_id = kind - if tbr: - format_id += '-%dk' % int(tbr) - if height: - format_id += '-%dp' % height - return format_id + return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p') if src or streaming_src: f.update({ @@ -654,7 +647,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # playlist stream 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', @@ -666,7 +659,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', 'only_matching': True, @@ -804,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): # Look for iframe embeds [1] for _, url in re.findall( r']+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): - entries.append(url if url.startswith('http') else 'http:' + url) + entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url) # Look for