From 137f07c27a88b07df57df5b2cdebebe3c7722bac Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 5 Aug 2024 11:06:10 +0200 Subject: Merging upstream version 2024.07.01. Signed-off-by: Daniel Baumann --- yt_dlp/extractor/_extractors.py | 29 +- yt_dlp/extractor/abc.py | 28 +- yt_dlp/extractor/abcnews.py | 2 +- yt_dlp/extractor/abcotvs.py | 5 +- yt_dlp/extractor/abematv.py | 22 +- yt_dlp/extractor/acast.py | 8 +- yt_dlp/extractor/acfun.py | 4 +- yt_dlp/extractor/adn.py | 19 +- yt_dlp/extractor/adobeconnect.py | 10 +- yt_dlp/extractor/adobepass.py | 866 ++++++++++++++-------------- yt_dlp/extractor/adobetv.py | 5 +- yt_dlp/extractor/adultswim.py | 6 +- yt_dlp/extractor/aenetworks.py | 34 +- yt_dlp/extractor/aeonco.py | 8 +- yt_dlp/extractor/afreecatv.py | 45 +- yt_dlp/extractor/agora.py | 6 +- yt_dlp/extractor/airtv.py | 4 +- yt_dlp/extractor/aitube.py | 2 +- yt_dlp/extractor/aliexpress.py | 3 +- yt_dlp/extractor/aljazeera.py | 14 +- yt_dlp/extractor/allocine.py | 5 +- yt_dlp/extractor/allstar.py | 26 +- yt_dlp/extractor/alphaporno.py | 2 +- yt_dlp/extractor/alsace20tv.py | 6 +- yt_dlp/extractor/altcensored.py | 2 +- yt_dlp/extractor/alura.py | 16 +- yt_dlp/extractor/amadeustv.py | 2 +- yt_dlp/extractor/amara.py | 10 +- yt_dlp/extractor/amazon.py | 8 +- yt_dlp/extractor/amazonminitv.py | 2 +- yt_dlp/extractor/amcnetworks.py | 12 +- yt_dlp/extractor/americastestkitchen.py | 14 +- yt_dlp/extractor/amp.py | 4 +- yt_dlp/extractor/anchorfm.py | 6 +- yt_dlp/extractor/angel.py | 10 +- yt_dlp/extractor/antenna.py | 2 +- yt_dlp/extractor/anvato.py | 8 +- yt_dlp/extractor/aol.py | 8 +- yt_dlp/extractor/apa.py | 4 +- yt_dlp/extractor/applepodcasts.py | 2 +- yt_dlp/extractor/appletrailers.py | 31 +- yt_dlp/extractor/archiveorg.py | 105 ++-- yt_dlp/extractor/arcpublishing.py | 11 +- yt_dlp/extractor/ard.py | 6 +- yt_dlp/extractor/arkena.py | 6 +- yt_dlp/extractor/arnes.py | 12 +- yt_dlp/extractor/art19.py | 2 +- yt_dlp/extractor/arte.py | 24 +- yt_dlp/extractor/atresplayer.py | 23 +- yt_dlp/extractor/atscaleconf.py | 10 +- yt_dlp/extractor/atvat.py | 16 +- yt_dlp/extractor/audimedia.py | 4 +- yt_dlp/extractor/audioboom.py | 4 +- yt_dlp/extractor/audiodraft.py | 13 +- yt_dlp/extractor/audiomack.py | 27 +- yt_dlp/extractor/audius.py | 46 +- yt_dlp/extractor/awaan.py | 27 +- yt_dlp/extractor/aws.py | 24 +- yt_dlp/extractor/azmedien.py | 6 +- yt_dlp/extractor/baidu.py | 7 +- yt_dlp/extractor/banbye.py | 9 +- yt_dlp/extractor/bandcamp.py | 31 +- yt_dlp/extractor/bannedvideo.py | 12 +- yt_dlp/extractor/bbc.py | 110 ++-- yt_dlp/extractor/beatport.py | 7 +- yt_dlp/extractor/beeg.py | 8 +- yt_dlp/extractor/behindkink.py | 2 +- yt_dlp/extractor/bellmedia.py | 2 +- yt_dlp/extractor/berufetv.py | 4 +- yt_dlp/extractor/bet.py | 8 +- yt_dlp/extractor/bfmtv.py | 4 +- yt_dlp/extractor/bigflix.py | 14 +- yt_dlp/extractor/bigo.py | 2 +- yt_dlp/extractor/bild.py | 4 +- yt_dlp/extractor/bilibili.py | 494 ++++++++++------ yt_dlp/extractor/bitchute.py | 30 +- yt_dlp/extractor/blackboardcollaborate.py | 2 +- yt_dlp/extractor/bleacherreport.py | 12 +- yt_dlp/extractor/blerp.py | 25 +- yt_dlp/extractor/blogger.py | 4 +- yt_dlp/extractor/bloomberg.py | 2 +- yt_dlp/extractor/bokecc.py | 15 +- yt_dlp/extractor/bongacams.py | 9 +- yt_dlp/extractor/bostonglobe.py | 3 +- yt_dlp/extractor/box.py | 6 +- yt_dlp/extractor/boxcast.py | 10 +- yt_dlp/extractor/br.py | 8 +- yt_dlp/extractor/brainpop.py | 14 +- yt_dlp/extractor/bravotv.py | 2 +- yt_dlp/extractor/breitbart.py | 4 +- yt_dlp/extractor/brightcove.py | 84 ++- yt_dlp/extractor/bundesliga.py | 10 +- yt_dlp/extractor/businessinsider.py | 4 +- yt_dlp/extractor/buzzfeed.py | 6 +- yt_dlp/extractor/byutv.py | 2 +- yt_dlp/extractor/c56.py | 4 +- yt_dlp/extractor/callin.py | 16 +- yt_dlp/extractor/caltrans.py | 2 +- yt_dlp/extractor/cam4.py | 4 +- yt_dlp/extractor/camdemy.py | 33 +- yt_dlp/extractor/camfm.py | 4 +- yt_dlp/extractor/cammodels.py | 8 +- yt_dlp/extractor/camtasia.py | 6 +- yt_dlp/extractor/canalalpha.py | 8 +- yt_dlp/extractor/canalc2.py | 2 +- yt_dlp/extractor/canalplus.py | 5 +- yt_dlp/extractor/caracoltv.py | 4 +- yt_dlp/extractor/cartoonnetwork.py | 2 +- yt_dlp/extractor/cbc.py | 57 +- yt_dlp/extractor/cbs.py | 6 +- yt_dlp/extractor/cbsnews.py | 1 - yt_dlp/extractor/ccc.py | 6 +- yt_dlp/extractor/ccma.py | 6 +- yt_dlp/extractor/cctv.py | 7 +- yt_dlp/extractor/cda.py | 29 +- yt_dlp/extractor/cellebrite.py | 4 +- yt_dlp/extractor/ceskatelevize.py | 24 +- yt_dlp/extractor/cgtn.py | 10 +- yt_dlp/extractor/chaturbate.py | 6 +- yt_dlp/extractor/cinemax.py | 2 +- yt_dlp/extractor/cinetecamilano.py | 8 +- yt_dlp/extractor/cineverse.py | 10 +- yt_dlp/extractor/ciscolive.py | 4 +- yt_dlp/extractor/ciscowebex.py | 4 +- yt_dlp/extractor/cjsw.py | 2 +- yt_dlp/extractor/clippit.py | 4 +- yt_dlp/extractor/cliprs.py | 2 +- yt_dlp/extractor/closertotruth.py | 10 +- yt_dlp/extractor/cloudflarestream.py | 19 +- yt_dlp/extractor/cloudycdn.py | 25 +- yt_dlp/extractor/clubic.py | 4 +- yt_dlp/extractor/clyp.py | 6 +- yt_dlp/extractor/cmt.py | 4 +- yt_dlp/extractor/cnn.py | 8 +- yt_dlp/extractor/common.py | 148 ++--- yt_dlp/extractor/commonmistakes.py | 6 +- yt_dlp/extractor/commonprotocols.py | 2 +- yt_dlp/extractor/condenast.py | 33 +- yt_dlp/extractor/contv.py | 2 +- yt_dlp/extractor/corus.py | 12 +- yt_dlp/extractor/coub.py | 8 +- yt_dlp/extractor/cozytv.py | 10 +- yt_dlp/extractor/cpac.py | 24 +- yt_dlp/extractor/cracked.py | 4 +- yt_dlp/extractor/crackle.py | 14 +- yt_dlp/extractor/craftsy.py | 2 +- yt_dlp/extractor/crooksandliars.py | 4 +- yt_dlp/extractor/crowdbunker.py | 28 +- yt_dlp/extractor/crtvg.py | 4 +- yt_dlp/extractor/crunchyroll.py | 6 +- yt_dlp/extractor/cspan.py | 26 +- yt_dlp/extractor/ctsnews.py | 4 +- yt_dlp/extractor/ctv.py | 4 +- yt_dlp/extractor/ctvnews.py | 6 +- yt_dlp/extractor/cultureunplugged.py | 8 +- yt_dlp/extractor/curiositystream.py | 9 +- yt_dlp/extractor/cwtv.py | 4 +- yt_dlp/extractor/cybrary.py | 20 +- yt_dlp/extractor/dailymail.py | 10 +- yt_dlp/extractor/dailymotion.py | 16 +- yt_dlp/extractor/dailywire.py | 6 +- yt_dlp/extractor/damtomo.py | 9 +- yt_dlp/extractor/daum.py | 28 +- yt_dlp/extractor/dbtv.py | 2 +- yt_dlp/extractor/dctp.py | 11 +- yt_dlp/extractor/deezer.py | 6 +- yt_dlp/extractor/democracynow.py | 8 +- yt_dlp/extractor/detik.py | 20 +- yt_dlp/extractor/deuxm.py | 16 +- yt_dlp/extractor/dfb.py | 4 +- yt_dlp/extractor/digitalconcerthall.py | 67 ++- yt_dlp/extractor/digiteka.py | 2 +- yt_dlp/extractor/discovery.py | 10 +- yt_dlp/extractor/discoverygo.py | 5 +- yt_dlp/extractor/disney.py | 8 +- yt_dlp/extractor/dispeak.py | 10 +- yt_dlp/extractor/dlf.py | 36 +- yt_dlp/extractor/dlive.py | 8 +- yt_dlp/extractor/douyutv.py | 10 +- yt_dlp/extractor/dplay.py | 16 +- yt_dlp/extractor/drbonanza.py | 2 +- yt_dlp/extractor/dreisat.py | 4 +- yt_dlp/extractor/drooble.py | 6 +- yt_dlp/extractor/dropbox.py | 14 +- yt_dlp/extractor/dropout.py | 34 +- yt_dlp/extractor/drtuber.py | 10 +- yt_dlp/extractor/drtv.py | 18 +- yt_dlp/extractor/dtube.py | 6 +- yt_dlp/extractor/duboku.py | 29 +- yt_dlp/extractor/dumpert.py | 4 +- yt_dlp/extractor/dvtv.py | 14 +- yt_dlp/extractor/dw.py | 15 +- yt_dlp/extractor/eagleplatform.py | 20 +- yt_dlp/extractor/ebaumsworld.py | 2 +- yt_dlp/extractor/ebay.py | 4 +- yt_dlp/extractor/egghead.py | 11 +- yt_dlp/extractor/eighttracks.py | 49 +- yt_dlp/extractor/eitb.py | 18 +- yt_dlp/extractor/elpais.py | 4 +- yt_dlp/extractor/eltrecetv.py | 4 +- yt_dlp/extractor/epicon.py | 29 +- yt_dlp/extractor/epoch.py | 10 +- yt_dlp/extractor/eporner.py | 13 +- yt_dlp/extractor/erocast.py | 2 +- yt_dlp/extractor/eroprofile.py | 6 +- yt_dlp/extractor/err.py | 2 +- yt_dlp/extractor/ertgr.py | 17 +- yt_dlp/extractor/espn.py | 41 +- yt_dlp/extractor/ettutv.py | 2 +- yt_dlp/extractor/europa.py | 24 +- yt_dlp/extractor/europeantour.py | 8 +- yt_dlp/extractor/eurosport.py | 10 +- yt_dlp/extractor/euscreen.py | 18 +- yt_dlp/extractor/expressen.py | 2 +- yt_dlp/extractor/eyedotv.py | 12 +- yt_dlp/extractor/facebook.py | 52 +- yt_dlp/extractor/fancode.py | 35 +- yt_dlp/extractor/fc2.py | 18 +- yt_dlp/extractor/filmon.py | 11 +- yt_dlp/extractor/filmweb.py | 2 +- yt_dlp/extractor/firsttv.py | 27 +- yt_dlp/extractor/flickr.py | 14 +- yt_dlp/extractor/floatplane.py | 2 +- yt_dlp/extractor/folketinget.py | 5 +- yt_dlp/extractor/footyroom.py | 2 +- yt_dlp/extractor/fourtube.py | 41 +- yt_dlp/extractor/fox.py | 15 +- yt_dlp/extractor/fptplay.py | 2 +- yt_dlp/extractor/francetv.py | 54 +- yt_dlp/extractor/freesound.py | 2 +- yt_dlp/extractor/freetv.py | 10 +- yt_dlp/extractor/frontendmasters.py | 31 +- yt_dlp/extractor/fujitv.py | 8 +- yt_dlp/extractor/funimation.py | 32 +- yt_dlp/extractor/funker530.py | 6 +- yt_dlp/extractor/fuyintv.py | 2 +- yt_dlp/extractor/gab.py | 22 +- yt_dlp/extractor/gaia.py | 14 +- yt_dlp/extractor/gamejolt.py | 34 +- yt_dlp/extractor/gamespot.py | 5 +- yt_dlp/extractor/gamestar.py | 6 +- yt_dlp/extractor/gaskrank.py | 4 +- yt_dlp/extractor/gazeta.py | 4 +- yt_dlp/extractor/gbnews.py | 14 +- yt_dlp/extractor/gdcvault.py | 10 +- yt_dlp/extractor/gedidigital.py | 4 +- yt_dlp/extractor/generic.py | 155 ++--- yt_dlp/extractor/genericembeds.py | 10 +- yt_dlp/extractor/getcourseru.py | 22 +- yt_dlp/extractor/gettr.py | 14 +- yt_dlp/extractor/giantbomb.py | 2 +- yt_dlp/extractor/gigya.py | 2 +- yt_dlp/extractor/glide.py | 2 +- yt_dlp/extractor/globalplayer.py | 4 +- yt_dlp/extractor/globo.py | 29 +- yt_dlp/extractor/glomex.py | 6 +- yt_dlp/extractor/gmanetwork.py | 4 +- yt_dlp/extractor/go.py | 20 +- yt_dlp/extractor/godresource.py | 10 +- yt_dlp/extractor/godtube.py | 4 +- yt_dlp/extractor/gofile.py | 8 +- yt_dlp/extractor/golem.py | 16 +- yt_dlp/extractor/googledrive.py | 20 +- yt_dlp/extractor/googlepodcasts.py | 2 +- yt_dlp/extractor/goplay.py | 143 +++-- yt_dlp/extractor/gopro.py | 8 +- yt_dlp/extractor/goshgay.py | 9 +- yt_dlp/extractor/gotostage.py | 19 +- yt_dlp/extractor/gputechconf.py | 4 +- yt_dlp/extractor/graspop.py | 32 + yt_dlp/extractor/gronkh.py | 14 +- yt_dlp/extractor/groupon.py | 3 +- yt_dlp/extractor/harpodeon.py | 6 +- yt_dlp/extractor/hbo.py | 9 +- yt_dlp/extractor/heise.py | 6 +- yt_dlp/extractor/hidive.py | 6 +- yt_dlp/extractor/historicfilms.py | 2 +- yt_dlp/extractor/hitrecord.py | 11 +- yt_dlp/extractor/hketv.py | 6 +- yt_dlp/extractor/hollywoodreporter.py | 2 +- yt_dlp/extractor/holodex.py | 2 +- yt_dlp/extractor/hotnewhiphop.py | 9 +- yt_dlp/extractor/hotstar.py | 11 +- yt_dlp/extractor/hrfensehen.py | 12 +- yt_dlp/extractor/hrti.py | 20 +- yt_dlp/extractor/hse.py | 4 +- yt_dlp/extractor/huajiao.py | 2 +- yt_dlp/extractor/huffpost.py | 2 +- yt_dlp/extractor/hungama.py | 6 +- yt_dlp/extractor/huya.py | 13 +- yt_dlp/extractor/hypem.py | 6 +- yt_dlp/extractor/hypergryph.py | 4 +- yt_dlp/extractor/hytale.py | 4 +- yt_dlp/extractor/icareus.py | 12 +- yt_dlp/extractor/ichinanalive.py | 15 +- yt_dlp/extractor/ign.py | 21 +- yt_dlp/extractor/iheart.py | 2 +- yt_dlp/extractor/ilpost.py | 2 +- yt_dlp/extractor/iltalehti.py | 2 +- yt_dlp/extractor/imdb.py | 8 +- yt_dlp/extractor/imggaming.py | 2 +- yt_dlp/extractor/imgur.py | 21 +- yt_dlp/extractor/ina.py | 2 +- yt_dlp/extractor/inc.py | 2 +- yt_dlp/extractor/indavideo.py | 4 +- yt_dlp/extractor/infoq.py | 12 +- yt_dlp/extractor/instagram.py | 56 +- yt_dlp/extractor/internazionale.py | 4 +- yt_dlp/extractor/iprima.py | 4 +- yt_dlp/extractor/iqiyi.py | 77 +-- yt_dlp/extractor/islamchannel.py | 2 +- yt_dlp/extractor/israelnationalnews.py | 6 +- yt_dlp/extractor/itprotv.py | 14 +- yt_dlp/extractor/itv.py | 33 +- yt_dlp/extractor/ivi.py | 26 +- yt_dlp/extractor/ivideon.py | 16 +- yt_dlp/extractor/iwara.py | 9 +- yt_dlp/extractor/ixigua.py | 2 +- yt_dlp/extractor/izlesene.py | 18 +- yt_dlp/extractor/jamendo.py | 32 +- yt_dlp/extractor/japandiet.py | 8 +- yt_dlp/extractor/jiocinema.py | 37 +- yt_dlp/extractor/jiosaavn.py | 2 +- yt_dlp/extractor/joj.py | 15 +- yt_dlp/extractor/jove.py | 4 +- yt_dlp/extractor/jwplatform.py | 4 +- yt_dlp/extractor/kakao.py | 10 +- yt_dlp/extractor/kaltura.py | 73 ++- yt_dlp/extractor/kankanews.py | 2 +- yt_dlp/extractor/karaoketv.py | 4 +- yt_dlp/extractor/kelbyone.py | 2 +- yt_dlp/extractor/khanacademy.py | 141 +++-- yt_dlp/extractor/kicker.py | 6 +- yt_dlp/extractor/kinja.py | 18 +- yt_dlp/extractor/kommunetv.py | 10 +- yt_dlp/extractor/kompas.py | 2 +- yt_dlp/extractor/koo.py | 27 +- yt_dlp/extractor/kth.py | 7 +- yt_dlp/extractor/ku6.py | 10 +- yt_dlp/extractor/kuwo.py | 42 +- yt_dlp/extractor/la7.py | 2 +- yt_dlp/extractor/laracasts.py | 114 ++++ yt_dlp/extractor/laxarxames.py | 2 +- yt_dlp/extractor/lbry.py | 16 +- yt_dlp/extractor/lcp.py | 2 +- yt_dlp/extractor/lecture2go.py | 2 +- yt_dlp/extractor/lecturio.py | 6 +- yt_dlp/extractor/leeco.py | 41 +- yt_dlp/extractor/lego.py | 6 +- yt_dlp/extractor/lenta.py | 2 +- yt_dlp/extractor/libraryofcongress.py | 2 +- yt_dlp/extractor/libsyn.py | 6 +- yt_dlp/extractor/lifenews.py | 27 +- yt_dlp/extractor/likee.py | 2 +- yt_dlp/extractor/limelight.py | 16 +- yt_dlp/extractor/linkedin.py | 27 +- yt_dlp/extractor/liputan6.py | 6 +- yt_dlp/extractor/listennotes.py | 6 +- yt_dlp/extractor/litv.py | 6 +- yt_dlp/extractor/livejournal.py | 5 +- yt_dlp/extractor/livestream.py | 42 +- yt_dlp/extractor/livestreamfails.py | 4 +- yt_dlp/extractor/lnkgo.py | 27 +- yt_dlp/extractor/lovehomeporn.py | 6 +- yt_dlp/extractor/lrt.py | 8 +- yt_dlp/extractor/lsm.py | 12 +- yt_dlp/extractor/lumni.py | 2 +- yt_dlp/extractor/lynda.py | 54 +- yt_dlp/extractor/magentamusik.py | 2 +- yt_dlp/extractor/mailru.py | 15 +- yt_dlp/extractor/mainstreaming.py | 28 +- yt_dlp/extractor/mangomolo.py | 13 +- yt_dlp/extractor/manoto.py | 12 +- yt_dlp/extractor/manyvids.py | 8 +- yt_dlp/extractor/markiza.py | 9 +- yt_dlp/extractor/massengeschmacktv.py | 2 +- yt_dlp/extractor/masters.py | 2 +- yt_dlp/extractor/matchtv.py | 40 +- yt_dlp/extractor/mdr.py | 7 +- yt_dlp/extractor/medaltv.py | 13 +- yt_dlp/extractor/mediaite.py | 14 +- yt_dlp/extractor/mediaklikk.py | 39 +- yt_dlp/extractor/mediaset.py | 6 +- yt_dlp/extractor/mediasite.py | 96 +-- yt_dlp/extractor/mediaworksnz.py | 10 +- yt_dlp/extractor/meipai.py | 4 +- yt_dlp/extractor/melonvod.py | 4 +- yt_dlp/extractor/metacritic.py | 6 +- yt_dlp/extractor/mgtv.py | 6 +- yt_dlp/extractor/microsoftembed.py | 262 ++++++++- yt_dlp/extractor/microsoftstream.py | 6 +- yt_dlp/extractor/microsoftvirtualacademy.py | 189 ------ yt_dlp/extractor/mildom.py | 10 +- yt_dlp/extractor/minds.py | 11 +- yt_dlp/extractor/minoto.py | 2 +- yt_dlp/extractor/mirrativ.py | 6 +- yt_dlp/extractor/mit.py | 8 +- yt_dlp/extractor/mixch.py | 4 +- yt_dlp/extractor/mixcloud.py | 37 +- yt_dlp/extractor/mlb.py | 34 +- yt_dlp/extractor/mlssoccer.py | 69 +-- yt_dlp/extractor/mocha.py | 4 +- yt_dlp/extractor/mojvideo.py | 6 +- yt_dlp/extractor/monstercat.py | 6 +- yt_dlp/extractor/motherless.py | 6 +- yt_dlp/extractor/motorsport.py | 11 +- yt_dlp/extractor/moview.py | 6 +- yt_dlp/extractor/moviezine.py | 2 +- yt_dlp/extractor/movingimage.py | 2 +- yt_dlp/extractor/msn.py | 5 +- yt_dlp/extractor/mtv.py | 50 +- yt_dlp/extractor/muenchentv.py | 8 +- yt_dlp/extractor/murrtube.py | 6 +- yt_dlp/extractor/musescore.py | 12 +- yt_dlp/extractor/musicdex.py | 50 +- yt_dlp/extractor/mx3.py | 10 +- yt_dlp/extractor/mxplayer.py | 25 +- yt_dlp/extractor/myspace.py | 14 +- yt_dlp/extractor/myspass.py | 3 +- yt_dlp/extractor/mzaalo.py | 6 +- yt_dlp/extractor/n1.py | 6 +- yt_dlp/extractor/nate.py | 24 +- yt_dlp/extractor/nationalgeographic.py | 2 +- yt_dlp/extractor/naver.py | 12 +- yt_dlp/extractor/nba.py | 23 +- yt_dlp/extractor/nbc.py | 29 +- yt_dlp/extractor/ndr.py | 18 +- yt_dlp/extractor/ndtv.py | 26 +- yt_dlp/extractor/nekohacker.py | 32 +- yt_dlp/extractor/neteasemusic.py | 196 ++++--- yt_dlp/extractor/netverse.py | 14 +- yt_dlp/extractor/netzkino.py | 6 +- yt_dlp/extractor/newgrounds.py | 10 +- yt_dlp/extractor/newsy.py | 4 +- yt_dlp/extractor/nextmedia.py | 17 +- yt_dlp/extractor/nexx.py | 92 ++- yt_dlp/extractor/nfhsnetwork.py | 52 +- yt_dlp/extractor/nfl.py | 4 +- yt_dlp/extractor/nhk.py | 266 ++++++--- yt_dlp/extractor/nhl.py | 10 +- yt_dlp/extractor/nick.py | 18 +- yt_dlp/extractor/niconico.py | 74 +-- yt_dlp/extractor/niconicochannelplus.py | 4 +- yt_dlp/extractor/ninaprotocol.py | 10 +- yt_dlp/extractor/ninecninemedia.py | 10 +- yt_dlp/extractor/ninegag.py | 6 +- yt_dlp/extractor/ninenews.py | 4 +- yt_dlp/extractor/ninenow.py | 21 +- yt_dlp/extractor/nintendo.py | 2 +- yt_dlp/extractor/nitter.py | 18 +- yt_dlp/extractor/nobelprize.py | 2 +- yt_dlp/extractor/noice.py | 6 +- yt_dlp/extractor/nonktube.py | 2 +- yt_dlp/extractor/noodlemagazine.py | 6 +- yt_dlp/extractor/noovo.py | 7 +- yt_dlp/extractor/nosnl.py | 6 +- yt_dlp/extractor/nova.py | 8 +- yt_dlp/extractor/novaplay.py | 4 +- yt_dlp/extractor/nowness.py | 7 +- yt_dlp/extractor/noz.py | 9 +- yt_dlp/extractor/npo.py | 40 +- yt_dlp/extractor/npr.py | 4 +- yt_dlp/extractor/nrk.py | 72 ++- yt_dlp/extractor/ntvru.py | 6 +- yt_dlp/extractor/nubilesporn.py | 6 +- yt_dlp/extractor/nuevo.py | 2 +- yt_dlp/extractor/nuum.py | 6 +- yt_dlp/extractor/nuvid.py | 8 +- yt_dlp/extractor/nytimes.py | 2 +- yt_dlp/extractor/nzherald.py | 21 +- yt_dlp/extractor/nzonscreen.py | 2 +- yt_dlp/extractor/odkmedia.py | 4 +- yt_dlp/extractor/odnoklassniki.py | 19 +- yt_dlp/extractor/oftv.py | 8 +- yt_dlp/extractor/oktoberfesttv.py | 2 +- yt_dlp/extractor/olympics.py | 8 +- yt_dlp/extractor/on24.py | 6 +- yt_dlp/extractor/onefootball.py | 2 +- yt_dlp/extractor/onenewsnz.py | 10 +- yt_dlp/extractor/oneplace.py | 4 +- yt_dlp/extractor/onet.py | 6 +- yt_dlp/extractor/onionstudios.py | 3 +- yt_dlp/extractor/opencast.py | 2 +- yt_dlp/extractor/openload.py | 10 +- yt_dlp/extractor/openrec.py | 7 +- yt_dlp/extractor/ora.py | 8 +- yt_dlp/extractor/orf.py | 160 +++-- yt_dlp/extractor/outsidetv.py | 2 +- yt_dlp/extractor/packtpub.py | 9 +- yt_dlp/extractor/palcomp3.py | 11 +- yt_dlp/extractor/panopto.py | 66 +-- yt_dlp/extractor/paramountplus.py | 8 +- yt_dlp/extractor/parler.py | 2 +- yt_dlp/extractor/parlview.py | 7 +- yt_dlp/extractor/patreon.py | 43 +- yt_dlp/extractor/pbs.py | 37 +- yt_dlp/extractor/pearvideo.py | 4 +- yt_dlp/extractor/peertube.py | 71 ++- yt_dlp/extractor/peertv.py | 2 +- yt_dlp/extractor/peloton.py | 26 +- yt_dlp/extractor/performgroup.py | 11 +- yt_dlp/extractor/periscope.py | 12 +- yt_dlp/extractor/philharmoniedeparis.py | 7 +- yt_dlp/extractor/phoenix.py | 9 +- yt_dlp/extractor/photobucket.py | 6 +- yt_dlp/extractor/piapro.py | 15 +- yt_dlp/extractor/picarto.py | 14 +- yt_dlp/extractor/piksel.py | 8 +- yt_dlp/extractor/pinkbike.py | 8 +- yt_dlp/extractor/pinterest.py | 17 +- yt_dlp/extractor/pixivsketch.py | 4 +- yt_dlp/extractor/pladform.py | 12 +- yt_dlp/extractor/planetmarathi.py | 15 +- yt_dlp/extractor/platzi.py | 22 +- yt_dlp/extractor/playsuisse.py | 20 +- yt_dlp/extractor/playtvak.py | 24 +- yt_dlp/extractor/playwire.py | 2 +- yt_dlp/extractor/pluralsight.py | 69 +-- yt_dlp/extractor/plutotv.py | 25 +- yt_dlp/extractor/podbayfm.py | 41 +- yt_dlp/extractor/podchaser.py | 14 +- yt_dlp/extractor/podomatic.py | 11 +- yt_dlp/extractor/pokemon.py | 14 +- yt_dlp/extractor/pokergo.py | 29 +- yt_dlp/extractor/polsatgo.py | 6 +- yt_dlp/extractor/polskieradio.py | 17 +- yt_dlp/extractor/popcorntimes.py | 5 +- yt_dlp/extractor/popcorntv.py | 2 +- yt_dlp/extractor/pornbox.py | 12 +- yt_dlp/extractor/pornflip.py | 2 +- yt_dlp/extractor/pornhub.py | 66 +-- yt_dlp/extractor/pornotube.py | 11 +- yt_dlp/extractor/pornovoisines.py | 6 +- yt_dlp/extractor/pornoxo.py | 2 +- yt_dlp/extractor/pr0gramm.py | 4 +- yt_dlp/extractor/prankcast.py | 24 +- yt_dlp/extractor/premiershiprugby.py | 2 +- yt_dlp/extractor/presstv.py | 10 +- yt_dlp/extractor/projectveritas.py | 10 +- yt_dlp/extractor/prosiebensat1.py | 20 +- yt_dlp/extractor/prx.py | 68 +-- yt_dlp/extractor/puhutv.py | 41 +- yt_dlp/extractor/puls4.py | 3 +- yt_dlp/extractor/pyvideo.py | 7 +- yt_dlp/extractor/qingting.py | 4 +- yt_dlp/extractor/qqmusic.py | 559 +++++++++++------- yt_dlp/extractor/r7.py | 6 +- yt_dlp/extractor/radiko.py | 8 +- yt_dlp/extractor/radiocanada.py | 8 +- yt_dlp/extractor/radiocomercial.py | 14 +- yt_dlp/extractor/radiode.py | 4 +- yt_dlp/extractor/radiofrance.py | 4 +- yt_dlp/extractor/radiojavan.py | 2 +- yt_dlp/extractor/radiokapital.py | 4 +- yt_dlp/extractor/radiozet.py | 2 +- yt_dlp/extractor/radlive.py | 10 +- yt_dlp/extractor/rai.py | 20 +- yt_dlp/extractor/raywenderlich.py | 16 +- yt_dlp/extractor/rbgtum.py | 10 +- yt_dlp/extractor/rcs.py | 30 +- yt_dlp/extractor/rcti.py | 30 +- yt_dlp/extractor/rds.py | 7 +- yt_dlp/extractor/redbee.py | 30 +- yt_dlp/extractor/redbulltv.py | 19 +- yt_dlp/extractor/redge.py | 4 +- yt_dlp/extractor/redgifs.py | 30 +- yt_dlp/extractor/redtube.py | 4 +- yt_dlp/extractor/rentv.py | 7 +- yt_dlp/extractor/restudy.py | 4 +- yt_dlp/extractor/reuters.py | 8 +- yt_dlp/extractor/reverbnation.py | 6 +- yt_dlp/extractor/ridehome.py | 4 +- yt_dlp/extractor/rinsefm.py | 14 +- yt_dlp/extractor/rmcdecouverte.py | 8 +- yt_dlp/extractor/rockstargames.py | 2 +- yt_dlp/extractor/rokfin.py | 18 +- yt_dlp/extractor/roosterteeth.py | 6 +- yt_dlp/extractor/rottentomatoes.py | 4 +- yt_dlp/extractor/rozhlas.py | 16 +- yt_dlp/extractor/rte.py | 2 +- yt_dlp/extractor/rtl2.py | 4 +- yt_dlp/extractor/rtlnl.py | 26 +- yt_dlp/extractor/rtnews.py | 60 +- yt_dlp/extractor/rtp.py | 2 +- yt_dlp/extractor/rtrfm.py | 4 +- yt_dlp/extractor/rts.py | 11 +- yt_dlp/extractor/rtvcplay.py | 6 +- yt_dlp/extractor/rtve.py | 20 +- yt_dlp/extractor/rtvs.py | 8 +- yt_dlp/extractor/rtvslo.py | 164 +++--- yt_dlp/extractor/rule34video.py | 8 +- yt_dlp/extractor/rumble.py | 26 +- yt_dlp/extractor/rutube.py | 15 +- yt_dlp/extractor/rutv.py | 10 +- yt_dlp/extractor/ruutu.py | 16 +- yt_dlp/extractor/ruv.py | 8 +- yt_dlp/extractor/s4c.py | 6 +- yt_dlp/extractor/safari.py | 29 +- yt_dlp/extractor/saitosan.py | 8 +- yt_dlp/extractor/samplefocus.py | 8 +- yt_dlp/extractor/sapo.py | 2 +- yt_dlp/extractor/sbscokr.py | 4 +- yt_dlp/extractor/screencast.py | 15 +- yt_dlp/extractor/screencastomatic.py | 2 +- yt_dlp/extractor/scrippsnetworks.py | 12 +- yt_dlp/extractor/scrolller.py | 14 +- yt_dlp/extractor/scte.py | 6 +- yt_dlp/extractor/senategov.py | 19 +- yt_dlp/extractor/sendtonews.py | 6 +- yt_dlp/extractor/servus.py | 2 +- yt_dlp/extractor/sevenplus.py | 7 +- yt_dlp/extractor/sexu.py | 2 +- yt_dlp/extractor/seznamzpravy.py | 12 +- yt_dlp/extractor/shahid.py | 18 +- yt_dlp/extractor/shemaroome.py | 21 +- yt_dlp/extractor/showroomlive.py | 9 +- yt_dlp/extractor/sibnet.py | 4 +- yt_dlp/extractor/simplecast.py | 6 +- yt_dlp/extractor/sina.py | 7 +- yt_dlp/extractor/sixplay.py | 11 +- yt_dlp/extractor/skeb.py | 16 +- yt_dlp/extractor/sky.py | 2 +- yt_dlp/extractor/skyit.py | 10 +- yt_dlp/extractor/skylinewebcams.py | 2 +- yt_dlp/extractor/skynewsarabia.py | 11 +- yt_dlp/extractor/skynewsau.py | 12 +- yt_dlp/extractor/slideshare.py | 8 +- yt_dlp/extractor/slideslive.py | 2 +- yt_dlp/extractor/slutload.py | 12 +- yt_dlp/extractor/snotr.py | 2 +- yt_dlp/extractor/sohu.py | 48 +- yt_dlp/extractor/sonyliv.py | 6 +- yt_dlp/extractor/soundcloud.py | 106 ++-- yt_dlp/extractor/soundgasm.py | 4 +- yt_dlp/extractor/southpark.py | 4 +- yt_dlp/extractor/spankbang.py | 11 +- yt_dlp/extractor/spiegel.py | 4 +- yt_dlp/extractor/sport5.py | 6 +- yt_dlp/extractor/sportdeutschland.py | 12 +- yt_dlp/extractor/spotify.py | 6 +- yt_dlp/extractor/spreaker.py | 23 +- yt_dlp/extractor/springboardplatform.py | 5 +- yt_dlp/extractor/sproutvideo.py | 198 +++++++ yt_dlp/extractor/srgssr.py | 15 +- yt_dlp/extractor/srmediathek.py | 2 +- yt_dlp/extractor/stageplus.py | 2 +- yt_dlp/extractor/stanfordoc.py | 18 +- yt_dlp/extractor/startrek.py | 4 +- yt_dlp/extractor/startv.py | 31 +- yt_dlp/extractor/steam.py | 24 +- yt_dlp/extractor/stitcher.py | 5 +- yt_dlp/extractor/storyfire.py | 8 +- yt_dlp/extractor/streamable.py | 10 +- yt_dlp/extractor/streamcz.py | 14 +- yt_dlp/extractor/streetvoice.py | 8 +- yt_dlp/extractor/stretchinternet.py | 2 +- yt_dlp/extractor/stripchat.py | 2 +- yt_dlp/extractor/stv.py | 9 +- yt_dlp/extractor/substack.py | 8 +- yt_dlp/extractor/sunporno.py | 4 +- yt_dlp/extractor/sverigesradio.py | 2 +- yt_dlp/extractor/svt.py | 43 +- yt_dlp/extractor/swearnet.py | 8 +- yt_dlp/extractor/syfy.py | 4 +- yt_dlp/extractor/syvdk.py | 4 +- yt_dlp/extractor/tagesschau.py | 4 +- yt_dlp/extractor/taptap.py | 24 +- yt_dlp/extractor/tbs.py | 19 +- yt_dlp/extractor/tbsjp.py | 6 +- yt_dlp/extractor/teachable.py | 35 +- yt_dlp/extractor/teachertube.py | 12 +- yt_dlp/extractor/ted.py | 22 +- yt_dlp/extractor/tele13.py | 2 +- yt_dlp/extractor/telecaribe.py | 2 +- yt_dlp/extractor/telecinco.py | 2 +- yt_dlp/extractor/telegraaf.py | 8 +- yt_dlp/extractor/telegram.py | 2 +- yt_dlp/extractor/telemb.py | 6 +- yt_dlp/extractor/telemundo.py | 4 +- yt_dlp/extractor/telequebec.py | 7 +- yt_dlp/extractor/teletask.py | 8 +- yt_dlp/extractor/telewebion.py | 2 +- yt_dlp/extractor/tempo.py | 18 +- yt_dlp/extractor/tencent.py | 2 +- yt_dlp/extractor/tennistv.py | 14 +- yt_dlp/extractor/tenplay.py | 6 +- yt_dlp/extractor/testurl.py | 2 +- yt_dlp/extractor/tf1.py | 4 +- yt_dlp/extractor/tfo.py | 4 +- yt_dlp/extractor/theguardian.py | 31 +- yt_dlp/extractor/theholetv.py | 6 +- yt_dlp/extractor/theintercept.py | 7 +- yt_dlp/extractor/theplatform.py | 32 +- yt_dlp/extractor/thestar.py | 2 +- yt_dlp/extractor/theweatherchannel.py | 8 +- yt_dlp/extractor/thisamericanlife.py | 4 +- yt_dlp/extractor/thisvid.py | 4 +- yt_dlp/extractor/threeqsdn.py | 6 +- yt_dlp/extractor/threespeak.py | 28 +- yt_dlp/extractor/tiktok.py | 89 +-- yt_dlp/extractor/tmz.py | 6 +- yt_dlp/extractor/tnaflix.py | 13 +- yt_dlp/extractor/toggle.py | 24 +- yt_dlp/extractor/tonline.py | 4 +- yt_dlp/extractor/toongoggles.py | 2 +- yt_dlp/extractor/toutv.py | 2 +- yt_dlp/extractor/toypics.py | 8 +- yt_dlp/extractor/traileraddict.py | 4 +- yt_dlp/extractor/trovo.py | 4 +- yt_dlp/extractor/trtcocuk.py | 6 +- yt_dlp/extractor/trtworld.py | 10 +- yt_dlp/extractor/trueid.py | 6 +- yt_dlp/extractor/trutv.py | 2 +- yt_dlp/extractor/tube8.py | 10 +- yt_dlp/extractor/tubetugraz.py | 79 +-- yt_dlp/extractor/tubitv.py | 160 +++-- yt_dlp/extractor/tumblr.py | 14 +- yt_dlp/extractor/tunein.py | 4 +- yt_dlp/extractor/turner.py | 9 +- yt_dlp/extractor/tv2.py | 18 +- yt_dlp/extractor/tv24ua.py | 6 +- yt_dlp/extractor/tv2dk.py | 2 +- yt_dlp/extractor/tv2hu.py | 19 +- yt_dlp/extractor/tv4.py | 2 +- yt_dlp/extractor/tv5unis.py | 12 +- yt_dlp/extractor/tvanouvelles.py | 4 +- yt_dlp/extractor/tvc.py | 2 +- yt_dlp/extractor/tver.py | 2 +- yt_dlp/extractor/tvigle.py | 8 +- yt_dlp/extractor/tviplayer.py | 8 +- yt_dlp/extractor/tvn24.py | 4 +- yt_dlp/extractor/tvnoe.py | 4 +- yt_dlp/extractor/tvp.py | 13 +- yt_dlp/extractor/tvplay.py | 14 +- yt_dlp/extractor/tvplayer.py | 7 +- yt_dlp/extractor/tweakers.py | 4 +- yt_dlp/extractor/twentymin.py | 6 +- yt_dlp/extractor/twentythreevideo.py | 4 +- yt_dlp/extractor/twitcasting.py | 14 +- yt_dlp/extractor/twitch.py | 114 ++-- yt_dlp/extractor/twitter.py | 79 ++- yt_dlp/extractor/txxx.py | 38 +- yt_dlp/extractor/udemy.py | 44 +- yt_dlp/extractor/udn.py | 8 +- yt_dlp/extractor/uktvplay.py | 2 +- yt_dlp/extractor/umg.py | 4 +- yt_dlp/extractor/unistra.py | 8 +- yt_dlp/extractor/unity.py | 2 +- yt_dlp/extractor/uol.py | 16 +- yt_dlp/extractor/urort.py | 12 +- yt_dlp/extractor/urplay.py | 8 +- yt_dlp/extractor/usatoday.py | 7 +- yt_dlp/extractor/ustream.py | 33 +- yt_dlp/extractor/ustudio.py | 12 +- yt_dlp/extractor/utreon.py | 10 +- yt_dlp/extractor/veo.py | 6 +- yt_dlp/extractor/veoh.py | 16 +- yt_dlp/extractor/vesti.py | 4 +- yt_dlp/extractor/vevo.py | 43 +- yt_dlp/extractor/vgtv.py | 19 +- yt_dlp/extractor/vh1.py | 2 +- yt_dlp/extractor/vice.py | 16 +- yt_dlp/extractor/viddler.py | 6 +- yt_dlp/extractor/videa.py | 7 +- yt_dlp/extractor/videocampus_sachsen.py | 34 +- yt_dlp/extractor/videofyme.py | 2 +- yt_dlp/extractor/videoken.py | 2 +- yt_dlp/extractor/videomore.py | 13 +- yt_dlp/extractor/videopress.py | 6 +- yt_dlp/extractor/vidio.py | 22 +- yt_dlp/extractor/vidlii.py | 4 +- yt_dlp/extractor/vidly.py | 2 +- yt_dlp/extractor/viewlift.py | 29 +- yt_dlp/extractor/viidea.py | 23 +- yt_dlp/extractor/viki.py | 28 +- yt_dlp/extractor/vimeo.py | 86 +-- yt_dlp/extractor/vine.py | 15 +- yt_dlp/extractor/viously.py | 2 +- yt_dlp/extractor/viqeo.py | 2 +- yt_dlp/extractor/viu.py | 39 +- yt_dlp/extractor/vk.py | 10 +- yt_dlp/extractor/vodplatform.py | 2 +- yt_dlp/extractor/voicy.py | 19 +- yt_dlp/extractor/volejtv.py | 4 +- yt_dlp/extractor/voxmedia.py | 9 +- yt_dlp/extractor/vrt.py | 22 +- yt_dlp/extractor/vtm.py | 4 +- yt_dlp/extractor/vuclip.py | 10 +- yt_dlp/extractor/vvvvid.py | 22 +- yt_dlp/extractor/walla.py | 4 +- yt_dlp/extractor/washingtonpost.py | 6 +- yt_dlp/extractor/wat.py | 5 +- yt_dlp/extractor/wdr.py | 29 +- yt_dlp/extractor/webcamerapl.py | 4 +- yt_dlp/extractor/webcaster.py | 2 +- yt_dlp/extractor/webofstories.py | 18 +- yt_dlp/extractor/weibo.py | 11 +- yt_dlp/extractor/wevidi.py | 12 +- yt_dlp/extractor/whowatch.py | 21 +- yt_dlp/extractor/wikimedia.py | 4 +- yt_dlp/extractor/wimtv.py | 26 +- yt_dlp/extractor/wistia.py | 24 +- yt_dlp/extractor/wordpress.py | 14 +- yt_dlp/extractor/worldstarhiphop.py | 4 +- yt_dlp/extractor/wppilot.py | 4 +- yt_dlp/extractor/wsj.py | 7 +- yt_dlp/extractor/wwe.py | 7 +- yt_dlp/extractor/wykop.py | 2 +- yt_dlp/extractor/xanimu.py | 19 +- yt_dlp/extractor/xboxclips.py | 4 +- yt_dlp/extractor/xhamster.py | 27 +- yt_dlp/extractor/xiaohongshu.py | 4 +- yt_dlp/extractor/ximalaya.py | 30 +- yt_dlp/extractor/xinpianchang.py | 4 +- yt_dlp/extractor/xminus.py | 2 +- yt_dlp/extractor/xnxx.py | 2 +- yt_dlp/extractor/xstream.py | 5 +- yt_dlp/extractor/xvideos.py | 48 +- yt_dlp/extractor/xxxymovies.py | 2 +- yt_dlp/extractor/yahoo.py | 22 +- yt_dlp/extractor/yandexdisk.py | 8 +- yt_dlp/extractor/yandexmusic.py | 73 ++- yt_dlp/extractor/yandexvideo.py | 12 +- yt_dlp/extractor/yapfiles.py | 6 +- yt_dlp/extractor/yappy.py | 12 +- yt_dlp/extractor/yle_areena.py | 10 +- yt_dlp/extractor/youjizz.py | 2 +- yt_dlp/extractor/youku.py | 4 +- yt_dlp/extractor/younow.py | 45 +- yt_dlp/extractor/youporn.py | 6 +- yt_dlp/extractor/youtube.py | 403 ++++++------- yt_dlp/extractor/zaiko.py | 2 +- yt_dlp/extractor/zapiks.py | 4 +- yt_dlp/extractor/zattoo.py | 49 +- yt_dlp/extractor/zdf.py | 31 +- yt_dlp/extractor/zee5.py | 33 +- yt_dlp/extractor/zeenews.py | 6 +- yt_dlp/extractor/zenporn.py | 8 +- yt_dlp/extractor/zetland.py | 4 +- yt_dlp/extractor/zhihu.py | 2 +- yt_dlp/extractor/zingmp3.py | 12 +- yt_dlp/extractor/zoom.py | 10 +- yt_dlp/extractor/zype.py | 6 +- 843 files changed, 8292 insertions(+), 7337 deletions(-) create mode 100644 yt_dlp/extractor/graspop.py create mode 100644 yt_dlp/extractor/laracasts.py delete mode 100644 yt_dlp/extractor/microsoftvirtualacademy.py create mode 100644 yt_dlp/extractor/sproutvideo.py (limited to 'yt_dlp/extractor') diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e9cd38a..7f6507d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -76,6 +76,7 @@ from .aenetworks import ( ) from .aeonco import AeonCoIE from .afreecatv import ( + AfreecaTVCatchStoryIE, AfreecaTVIE, AfreecaTVLiveIE, AfreecaTVUserIE, @@ -779,6 +780,7 @@ from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE +from .graspop import GraspopIE from .gronkh import ( GronkhFeedIE, GronkhIE, @@ -969,6 +971,10 @@ from .la7 import ( LA7PodcastEpisodeIE, LA7PodcastIE, ) +from .laracasts import ( + LaracastsIE, + LaracastsPlaylistIE, +) from .lastfm import ( LastFMIE, LastFMPlaylistIE, @@ -1113,12 +1119,15 @@ from .meipai import MeipaiIE from .melonvod import MelonVODIE from .metacritic import MetacriticIE from .mgtv import MGTVIE -from .microsoftembed import MicrosoftEmbedIE -from .microsoftstream import MicrosoftStreamIE -from .microsoftvirtualacademy import ( - MicrosoftVirtualAcademyCourseIE, - MicrosoftVirtualAcademyIE, +from .microsoftembed import ( + MicrosoftBuildIE, + MicrosoftEmbedIE, + MicrosoftLearnEpisodeIE, + MicrosoftLearnPlaylistIE, + MicrosoftLearnSessionIE, + MicrosoftMediusIE, ) +from .microsoftstream import MicrosoftStreamIE from .mildom import ( MildomClipIE, MildomIE, @@ -1603,6 +1612,7 @@ from .qqmusic import ( QQMusicPlaylistIE, QQMusicSingerIE, QQMusicToplistIE, + QQMusicVideoIE, ) from .r7 import ( R7IE, @@ -1755,7 +1765,10 @@ from .rtve import ( RTVETelevisionIE, ) from .rtvs import RTVSIE -from .rtvslo import RTVSLOIE +from .rtvslo import ( + RTVSLOIE, + RTVSLOShowIE, +) from .rudovideo import RudoVideoIE from .rule34video import Rule34VideoIE from .rumble import ( @@ -1925,6 +1938,10 @@ from .spreaker import ( ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE +from .sproutvideo import ( + SproutVideoIE, + VidsIoIE, +) from .srgssr import ( SRGSSRIE, SRGSSRPlayIE, diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 2c0d296..7518ba6 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -4,7 +4,6 @@ import re import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, dict_get, @@ -67,7 +66,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'WWI Centenary', 'description': 'md5:c2379ec0ca84072e86b446e536954546', - } + }, }, { 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', 'info_dict': { @@ -75,7 +74,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', - } + }, }, { 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', 'info_dict': { @@ -86,7 +85,7 @@ class ABCIE(InfoExtractor): 'upload_date': '20200813', 'uploader': 'Behind the News', 'uploader_id': 'behindthenews', - } + }, }, { 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', 'info_dict': { @@ -95,7 +94,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', - } + }, }] def _real_extract(self, url): @@ -126,7 +125,7 @@ class ABCIE(InfoExtractor): if mobj is None: expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?(.+?)', webpage, 'expired', None) if expired: - raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True) raise ExtractorError('Unable to extract video urls') urls_info = self._parse_json( @@ -164,7 +163,7 @@ class ABCIE(InfoExtractor): 'height': height, 'tbr': bitrate, 'filesize': int_or_none(url_info.get('filesize')), - 'format_id': format_id + 'format_id': format_id, }) return { @@ -288,13 +287,12 @@ class ABCIViewIE(InfoExtractor): stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) house_number = video_params.get('episodeHouseNumber') or video_id - path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( - int(time.time()), house_number) + path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet' sig = hmac.new( b'android.content.res.Resources', - path.encode('utf-8'), hashlib.sha256).hexdigest() + path.encode(), hashlib.sha256).hexdigest() token = self._download_webpage( - 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) + f'http://iview.abc.net.au{path}&sig={sig}', video_id) def tokenize_url(url, token): return update_url_query(url, { @@ -303,7 +301,7 @@ class ABCIViewIE(InfoExtractor): for sd in ('1080', '720', 'sd', 'sd-low'): sd_url = try_get( - stream, lambda x: x['streams']['hls'][sd], compat_str) + stream, lambda x: x['streams']['hls'][sd], str) if not sd_url: continue formats = self._extract_m3u8_formats( @@ -358,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'md5:93119346c24a7c322d446d8eece430ff', 'series': 'Upper Middle Bogan', 'season': 'Series 1', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 8, }, { @@ -386,7 +384,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.', 'series': '7.30 Mark Humphries Satire', 'season': 'Episodes', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 15, }] @@ -398,7 +396,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', webpage, 'initial state') video_data = self._parse_json( - unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id) + unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id) video_data = video_data['route']['pageData']['_embedded'] highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) diff --git a/yt_dlp/extractor/abcnews.py b/yt_dlp/extractor/abcnews.py index a57295b..7215500 100644 --- a/yt_dlp/extractor/abcnews.py +++ b/yt_dlp/extractor/abcnews.py @@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE): display_id = mobj.group('display_id') video_id = mobj.group('id') info_dict = self._extract_feed_info( - 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) + f'http://abcnews.go.com/video/itemfeed?id={video_id}') info_dict.update({ 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 6dca19d..ea5882b 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( dict_get, int_or_none, @@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor): data = self._download_json( 'https://api.abcotvs.com/v2/content', display_id, query={ 'id': video_id, - 'key': 'otv.web.%s.story' % station, + 'key': f'otv.web.{station}.story', 'station': station, })['data'] video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data - video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) + video_id = str(dict_get(video, ('id', 'publishedKey'), video_id)) title = video.get('title') or video['linkText'] formats = [] diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index b8c79b9..293a6c4 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -66,8 +66,8 @@ class AbemaLicenseHandler(urllib.request.BaseHandler): query={'t': media_token}, data=json.dumps({ 'kv': 'a', - 'lt': ticket - }).encode('utf-8'), + 'lt': ticket, + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -77,7 +77,7 @@ class AbemaLicenseHandler(urllib.request.BaseHandler): h = hmac.new( binascii.unhexlify(self.HKEY), - (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), + (license_response['cid'] + self.ie._DEVICE_ID).encode(), digestmod=hashlib.sha256) enckey = bytes_to_intlist(h.digest()) @@ -103,11 +103,11 @@ class AbemaTVBaseIE(InfoExtractor): @classmethod def _generate_aks(cls, deviceid): - deviceid = deviceid.encode('utf-8') + deviceid = deviceid.encode() # add 1 hour and then drop minute and secs ts_1hour = int((time_seconds() // 3600 + 1) * 3600) time_struct = time.gmtime(ts_1hour) - ts_1hour_str = str(ts_1hour).encode('utf-8') + ts_1hour_str = str(ts_1hour).encode() tmp = None @@ -119,7 +119,7 @@ class AbemaTVBaseIE(InfoExtractor): def mix_tmp(count): nonlocal tmp - for i in range(count): + for _ in range(count): mix_once(tmp) def mix_twist(nonce): @@ -160,7 +160,7 @@ class AbemaTVBaseIE(InfoExtractor): data=json.dumps({ 'deviceId': self._DEVICE_ID, 'applicationKeySecret': aks, - }).encode('utf-8'), + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -180,7 +180,7 @@ class AbemaTVBaseIE(InfoExtractor): 'osLang': 'ja_JP', 'osTimezone': 'Asia/Tokyo', 'appId': 'tv.abema', - 'appVersion': '3.27.1' + 'appVersion': '3.27.1', }, headers={ 'Authorization': f'bearer {self._get_device_token()}', })['token'] @@ -202,8 +202,8 @@ class AbemaTVBaseIE(InfoExtractor): f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in', data=json.dumps({ method: username, - 'password': password - }).encode('utf-8'), headers={ + 'password': password, + }).encode(), headers={ 'Authorization': f'bearer {self._get_device_token()}', 'Origin': 'https://abema.tv', 'Referer': 'https://abema.tv/', @@ -344,7 +344,7 @@ class AbemaTVIE(AbemaTVBaseIE): description = self._html_search_regex( (r'(.+?)

(.+?)(.+?)(.+?)' % (tag, tag), xml_str, tag) + f'<{tag}>(.+?)', xml_str, tag) def is_expired(token, date_ele): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) @@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.url, post_url) + post_url = urllib.parse.urljoin(urlh.url, post_url) form_data = self._hidden_inputs(form_page) form_data.update(data) return self._download_webpage_handle( @@ -1414,13 +1414,13 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' redirect_url = self._search_regex( r'(?i)]+src="(%s)' % HistoryPlayerIE._VALID_URL, + rf']+src="({HistoryPlayerIE._VALID_URL})', webpage, 'player URL') return self.url_result(player_url, HistoryPlayerIE.ie_key()) diff --git a/yt_dlp/extractor/aeonco.py b/yt_dlp/extractor/aeonco.py index 390eae3..22d0266 100644 --- a/yt_dlp/extractor/aeonco.py +++ b/yt_dlp/extractor/aeonco.py @@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Semiconductor', 'uploader_id': 'semiconductor', 'uploader_url': 'https://vimeo.com/semiconductor', - 'duration': 348 - } + 'duration': 348, + }, }, { 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', 'md5': '03582d795382e49f2fd0b427b55de409', @@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Aeon Video', 'uploader_id': 'aeonvideo', 'uploader_url': 'https://vimeo.com/aeonvideo', - 'duration': 1344 - } + 'duration': 1344, + }, }, { 'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out', 'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b', diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 3e5738f..f51b5a6 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -55,7 +55,7 @@ class AfreecaTVBaseIE(InfoExtractor): if result != 1: error = _ERRORS.get(result, 'You have failed to log in.') raise ExtractorError( - 'Unable to login: %s said: %s' % (self.IE_NAME, error), + f'Unable to login: {self.IE_NAME} said: {error}', expected=True) @@ -72,7 +72,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): )\?.*?\bnTitleNo=| vod\.afreecatv\.com/(PLAYER/STATION|player)/ ) - (?P\d+) + (?P\d+)/?(?:$|[?#&]) ''' _TESTS = [{ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', @@ -189,7 +189,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): headers={'Referer': url}, data=urlencode_postdata({ 'nTitleNo': video_id, 'nApiLevel': 10, - }))['data'] + }), impersonate=True)['data'] error_code = traverse_obj(data, ('code', {int})) if error_code == -6221: @@ -227,7 +227,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): **traverse_obj(file_element, { 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('file_start', {unified_timestamp}), - }) + }), }) if traverse_obj(data, ('adult_status', {str})) == 'notLogin': @@ -253,6 +253,43 @@ class AfreecaTVIE(AfreecaTVBaseIE): return self.playlist_result(entries, video_id, multi_video=True, **common_info) +class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): + IE_NAME = 'afreecatv:catchstory' + IE_DESC = 'afreecatv.com catch story' + _VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P\d+)/catchstory' + _TESTS = [{ + 'url': 'https://vod.afreecatv.com/player/103247/catchstory', + 'info_dict': { + 'id': '103247', + }, + 'playlist_count': 2, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._download_json( + 'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url}, + query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True) + + return self.playlist_result(self._entries(data), video_id) + + @staticmethod + def _entries(data): + # 'files' is always a list with 1 element + yield from traverse_obj(data, ( + 'data', lambda _, v: v['story_type'] == 'catch', + 'catch_list', lambda _, v: v['files'][0]['file'], { + 'id': ('files', 0, 'file_info_key', {str}), + 'url': ('files', 0, 'file', {url_or_none}), + 'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}), + 'title': ('title', {str}), + 'uploader': ('writer_nick', {str}), + 'uploader_id': ('writer_id', {str}), + 'thumbnail': ('thumb', {url_or_none}), + 'timestamp': ('write_timestamp', {int_or_none}), + })) + + class AfreecaTVLiveIE(AfreecaTVBaseIE): IE_NAME = 'afreecatv:live' IE_DESC = 'afreecatv.com livestreams' diff --git a/yt_dlp/extractor/agora.py b/yt_dlp/extractor/agora.py index abb2d3f..9835584 100644 --- a/yt_dlp/extractor/agora.py +++ b/yt_dlp/extractor/agora.py @@ -168,7 +168,7 @@ class TokFMPodcastIE(InfoExtractor): for ext in ('aac', 'mp3'): url_data = self._download_json( f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', - media_id, 'Downloading podcast %s URL' % ext) + media_id, f'Downloading podcast {ext} URL') # prevents inserting the mp3 (default) multiple times if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: formats.append({ @@ -206,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor): } @staticmethod - def _create_url(id): - return f'https://audycje.tokfm.pl/audycja/{id}' + def _create_url(video_id): + return f'https://audycje.tokfm.pl/audycja/{video_id}' def _real_extract(self, url): audition_id = self._match_id(url) diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py index 6cc63cd..cee660d 100644 --- a/yt_dlp/extractor/airtv.py +++ b/yt_dlp/extractor/airtv.py @@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor): 'view_count': int, 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', 'timestamp': 1664792603, - } + }, }, { # with youtube_id 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', @@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor): 'channel': 'Newsflare', 'duration': 37, 'upload_date': '20180511', - } + }, }] def _get_formats_and_subtitle(self, json_data, video_id): diff --git a/yt_dlp/extractor/aitube.py b/yt_dlp/extractor/aitube.py index 89a6450..5179b72 100644 --- a/yt_dlp/extractor/aitube.py +++ b/yt_dlp/extractor/aitube.py @@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor): 'timestamp': 1667370519, 'title': 'Ангел хранитель 1 серия', 'channel_follower_count': int, - } + }, }, { # embed url 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py index 2e83f2e..e8f8618 100644 --- a/yt_dlp/extractor/aliexpress.py +++ b/yt_dlp/extractor/aliexpress.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, try_get, @@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor): 'title': title, 'thumbnail': data.get('coverUrl'), 'uploader': try_get( - data, lambda x: x['followBar']['name'], compat_str), + data, lambda x: x['followBar']['name'], str), 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), 'formats': formats, } diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index 124bab0..9715b49 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor): 'timestamp': 1636219149, 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', 'upload_date': '20211106', - } + }, }, { 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', 'info_dict': { @@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor): BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P\d+)/(?P[a-zA-Z0-9]+)_(?P[^/]+)/index.html\?videoId=(?P\d+)' def _real_extract(self, url): - base, post_type, id = self._match_valid_url(url).groups() + base, post_type, display_id = self._match_valid_url(url).groups() wp = { 'balkans.aljazeera.net': 'ajb', 'chinese.aljazeera.net': 'chinese', @@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor): 'news': 'news', }[post_type.split('/')[0]] video = self._download_json( - f'https://{base}/graphql', id, query={ + f'https://{base}/graphql', display_id, query={ 'wp-site': wp, 'operationName': 'ArchipelagoSingleArticleQuery', 'variables': json.dumps({ - 'name': id, + 'name': display_id, 'postType': post_type, }), }, headers={ @@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor): embed = 'default' if video_id is None: - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, display_id) account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', group=(1, 2, 3, 4), default=(None, None, None, None)) @@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor): return { '_type': 'url_transparent', 'url': url, - 'ie_key': 'Generic' + 'ie_key': 'Generic', } return { '_type': 'url_transparent', 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', - 'ie_key': 'BrightcoveNew' + 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 2d342cf..e0859d4 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, qualities, @@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor): duration = int_or_none(video.get('duration')) view_count = int_or_none(video.get('view_count')) timestamp = unified_timestamp(try_get( - video, lambda x: x['added_at']['date'], compat_str)) + video, lambda x: x['added_at']['date'], str)) else: video_id = display_id media_data = self._download_json( - 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id) title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné')) for key, value in media_data['video'].items(): if not key.endswith('Path'): diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py index 49df4bf..5ea1c30 100644 --- a/yt_dlp/extractor/allstar.py +++ b/yt_dlp/extractor/allstar.py @@ -33,27 +33,27 @@ _QUERIES = { video: getClip(clipIdentifier: $id) { %s %s } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'montage': '''query ($id: String!) { video: getMontage(clipIdentifier: $id) { %s } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Clips': '''query ($page: Int!, $user: String!, $game: Int) { videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'Montages': '''query ($page: Int!, $user: String!) { videos: montages(search: createdDate, page: $page, user: $user) { data { %s } } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Mobile Clips': '''query ($page: Int!, $user: String!) { videos: clips(search: createdDate, page: $page, user: $user, mobile: true) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 } @@ -121,7 +121,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230425', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/clip?clip=8LJLY4JKB', 'info_dict': { @@ -139,7 +139,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230702', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c', 'info_dict': { @@ -155,7 +155,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230418', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=RILJMH6QOS', 'info_dict': { @@ -171,7 +171,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230703', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -191,28 +191,28 @@ class AllstarProfileIE(AllstarBaseIE): 'id': '62b8bdfc9021052f7905882d-clips', 'title': 'cherokee - Clips', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-clips-730', 'title': 'cherokee - Clips - 730', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-montages', 'title': 'cherokee - Montages', }, - 'playlist_mincount': 4 + 'playlist_mincount': 4, }, { 'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-mobile', 'title': 'cherokee - Mobile Clips', }, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }] _PAGE_SIZE = 10 diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index f927965..7b74d55 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor): 'tbr': 1145, 'categories': list, 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index ea3332e..c315e4f 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -12,7 +12,7 @@ from ..utils import ( class Alsace20TVBaseIE(InfoExtractor): def _extract_video(self, video_id, url=None): info = self._download_json( - 'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ), + f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html', video_id) or {} title = info.get('titre') @@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor): else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' - thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) + thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage)) upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) - upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None + upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/altcensored.py b/yt_dlp/extractor/altcensored.py index 6878918..bfbf6b6 100644 --- a/yt_dlp/extractor/altcensored.py +++ b/yt_dlp/extractor/altcensored.py @@ -34,7 +34,7 @@ class AltCensoredIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', 'view_count': int, 'categories': ['News & Politics'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index cb2b989..ce03a42 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, clean_html, @@ -21,7 +21,7 @@ class AluraIE(InfoExtractor): 'info_dict': { 'id': '60095', 'ext': 'mp4', - 'title': 'Referências, ref-set e alter' + 'title': 'Referências, ref-set e alter', }, 'skip': 'Requires alura account credentials'}, { @@ -30,7 +30,7 @@ class AluraIE(InfoExtractor): 'only_matching': True}, { 'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219', - 'only_matching': True} + 'only_matching': True}, ] def _real_extract(self, url): @@ -62,7 +62,7 @@ class AluraIE(InfoExtractor): return { 'id': video_id, 'title': video_title, - "formats": formats + 'formats': formats, } def _perform_login(self, username, password): @@ -91,7 +91,7 @@ class AluraIE(InfoExtractor): 'post url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( post_url, None, 'Logging in', @@ -103,7 +103,7 @@ class AluraIE(InfoExtractor): r'(?s)]+class="alert-message[^"]*">(.+?)

', response, 'error message', default=None) if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') @@ -119,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE @classmethod def suitable(cls, url): - return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url) + return False if AluraIE.suitable(url) else super().suitable(url) def _real_extract(self, url): @@ -157,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE 'url': video_url, 'id_key': self.ie_key(), 'chapter': chapter, - 'chapter_number': chapter_number + 'chapter_number': chapter_number, } entries.append(entry) return self.playlist_result(entries, course_path, course_title) diff --git a/yt_dlp/extractor/amadeustv.py b/yt_dlp/extractor/amadeustv.py index 2f5ca91..f4ea04e 100644 --- a/yt_dlp/extractor/amadeustv.py +++ b/yt_dlp/extractor/amadeustv.py @@ -24,7 +24,7 @@ class AmadeusTVIE(InfoExtractor): 'display_id': '65091a87ff85af59d9fc54c3', 'view_count': int, 'description': 'md5:a0357b9c215489e2067cbae0b777bb95', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 509b21a..ed0f0cd 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor): 'uploader': 'PBS NewsHour', 'uploader_id': 'PBSNewsHour', 'timestamp': 1549639570, - } + }, }, { # Vimeo 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', @@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor): 'timestamp': 1294763658, 'upload_date': '20110111', 'uploader': 'Sam Morrill', - 'uploader_id': 'sammorrill' - } + 'uploader_id': 'sammorrill', + }, }, { # Direct Link 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', @@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor): 'subtitles': dict, 'upload_date': '20091007', 'timestamp': 1254942511, - } + }, }] def _real_extract(self, url): video_id = self._match_id(url) meta = self._download_json( - 'https://amara.org/api/videos/%s/' % video_id, + f'https://amara.org/api/videos/{video_id}/', video_id, query={'format': 'json'}) title = meta['title'] video_url = meta['all_urls'][0] diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index a03f983..d1b9166 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) + playlist_id = self._match_id(url) for retry in self.RetryManager(): - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, playlist_id) try: data_json = self._search_json( - r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, + r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id, transform_source=js_to_json) except ExtractorError as e: retry.error = e @@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor): 'height': int_or_none(video.get('videoHeight')), 'width': int_or_none(video.get('videoWidth')), } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] - return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) + return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title')) class AmazonReviewsIE(InfoExtractor): diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 2c71c5e..0590a34 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -25,7 +25,7 @@ class AmazonMiniTVBaseIE(InfoExtractor): asin, note=note, headers={ 'Content-Type': 'application/json', 'currentpageurl': '/', - 'currentplatform': 'dWeb' + 'currentplatform': 'dWeb', }, data=json.dumps(data).encode() if data else None, query=None if data else { 'deviceType': 'A1WMMUXPCUJL4N', diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index 10bd021..15a86e2 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -64,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE site, display_id = self._match_valid_url(url).groups() requestor_id = self._REQUESTOR_ID_MAP[site] page_data = self._download_json( - 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' - % (requestor_id.lower(), display_id), display_id)['data'] + f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}', + display_id)['data'] properties = page_data.get('properties') or {} query = { 'mbr': 'true', @@ -76,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE try: for v in page_data['children']: if v.get('type') == 'video-player': - releasePid = v['properties']['currentVideo']['meta']['releasePid'] - tp_path = 'M_UwQC/' + releasePid + release_pid = v['properties']['currentVideo']['meta']['releasePid'] + tp_path = 'M_UwQC/' + release_pid media_url = 'https://link.theplatform.com/s/' + tp_path video_player_count += 1 except KeyError: pass if video_player_count > 1: self.report_warning( - 'The JSON data has %d video players. Only one will be extracted' % video_player_count) + f'The JSON data has {video_player_count} video players. Only one will be extracted') # Fall back to videoPid if releasePid not found. # TODO: Fall back to videoPid if releasePid manifest uses DRM. @@ -131,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE }) ns_keys = theplatform_metadata.get('$xmlns', {}).keys() if ns_keys: - ns = list(ns_keys)[0] + ns = next(iter(ns_keys)) episode = theplatform_metadata.get(ns + '$episodeTitle') or None episode_number = int_or_none( theplatform_metadata.get(ns + '$episode')) diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py index e889458..a6337e4 100644 --- a/yt_dlp/extractor/americastestkitchen.py +++ b/yt_dlp/extractor/americastestkitchen.py @@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor): resource_type = 'episodes' resource = self._download_json( - 'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id) + f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id) video = resource['video'] if is_episode else resource episode = resource if is_episode else resource.get('episode') or {} return { '_type': 'url_transparent', - 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'], + 'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']), 'ie_key': 'Zype', 'description': clean_html(video.get('description')), 'timestamp': unified_timestamp(video.get('publishDate')), @@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): ] if season_number: - playlist_id = 'season_%d' % season_number - playlist_title = 'Season %d' % season_number + playlist_id = f'season_{season_number}' + playlist_title = f'Season {season_number}' facet_filters.append('search_season_list:' + playlist_title) else: playlist_id = show playlist_title = title season_search = self._download_json( - 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, + f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production', playlist_id, headers={ 'Origin': 'https://www.americastestkitchen.com', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-Application-Id': 'Y1FNZXUI30', }, query={ 'facetFilters': json.dumps(facet_filters), - 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug, + 'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season', 'attributesToHighlight': '', 'hitsPerPage': 1000, }) @@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): 'description': episode.get('description'), 'timestamp': unified_timestamp(episode.get('search_document_date')), 'season_number': season_number, - 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)), + 'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')), 'ie_key': AmericasTestKitchenIE.ie_key(), } diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 6b2bf2d..adf4733 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with 'Unable to download Akamai AMP feed', transform_source=strip_jsonp) item = feed.get('channel', {}).get('item') if not item: - raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error'])) video_id = item['guid'] def get_media_node(name, default=None): - media_name = 'media-%s' % name + media_name = f'media-{name}' media_group = item.get('media-group') or item return media_group.get(media_name) or item.get(media_name) or item.get(name, default) diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py index 5e78f37..652154a 100644 --- a/yt_dlp/extractor/anchorfm.py +++ b/yt_dlp/extractor/anchorfm.py @@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'release_date': '20230121', 'release_timestamp': 1674285179, 'episode_id': 'e1tpt3d', - } + }, }, { # embed url 'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', @@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'season': 'Season 2', 'season_number': 2, 'episode_id': 'e1shjqd', - } + }, }] _WEBPAGE_TESTS = [{ @@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', 'uploader': 'Podcast Tempo', 'channel': 'apakatatempo', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py index 9f5b9b5..6800fe3 100644 --- a/yt_dlp/extractor/angel.py +++ b/yt_dlp/extractor/angel.py @@ -15,8 +15,8 @@ class AngelIE(InfoExtractor): 'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons', 'description': 'md5:73b704897c20ab59c433a9c0a8202d5e', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 1359.0 - } + 'duration': 1359.0, + }, }, { 'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name', 'md5': 'e4774bad0a5f0ad2e90d175cafdb797d', @@ -26,8 +26,8 @@ class AngelIE(InfoExtractor): 'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name', 'description': 'md5:aadfb4827a94415de5ff6426e6dee3be', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 3276.0 - } + 'duration': 3276.0, + }, }] def _real_extract(self, url): @@ -44,7 +44,7 @@ class AngelIE(InfoExtractor): 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } # Angel uses cloudinary in the background and supports image transformations. diff --git a/yt_dlp/extractor/antenna.py b/yt_dlp/extractor/antenna.py index 2929d65..b1a0179 100644 --- a/yt_dlp/extractor/antenna.py +++ b/yt_dlp/extractor/antenna.py @@ -105,7 +105,7 @@ class Ant1NewsGrArticleIE(AntennaBaseIE): info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) if not embed_urls: - raise ExtractorError('no videos found for %s' % video_id, expected=True) + raise ExtractorError(f'no videos found for {video_id}', expected=True) return self.playlist_from_matches( embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 0df5033..bf3d60b 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -238,7 +238,7 @@ class AnvatoIE(InfoExtractor): 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', - 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' + 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', } def _generate_nfl_token(self, anvack, mcp_id): @@ -255,7 +255,7 @@ class AnvatoIE(InfoExtractor): token } } -}''' % (anvack, mcp_id), +}''' % (anvack, mcp_id), # noqa: UP031 }).encode(), headers={ 'Authorization': auth_token, 'Content-Type': 'application/json', @@ -299,7 +299,7 @@ class AnvatoIE(InfoExtractor): return self._download_json( video_data_url, video_id, transform_source=strip_jsonp, query=query, - data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8')) + data=json.dumps({'api': api}, separators=(',', ':')).encode()) def _get_anvato_videos(self, access_key, video_id, token): video_data = self._get_video_json(access_key, video_id, token) @@ -358,7 +358,7 @@ class AnvatoIE(InfoExtractor): for caption in video_data.get('captions', []): a_caption = { 'url': caption['url'], - 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None + 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None, } subtitles.setdefault(caption['language'], []).append(a_caption) subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs) diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 455f667..893dce7 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -30,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # video with vidible ID 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', @@ -46,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'only_matching': True, @@ -83,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE return self._extract_yahoo_video(video_id, 'us') response = self._download_json( - 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, + f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details', video_id)['response'] if response['statusText'] != 'Ok': - raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True) video_data = response['data'] formats = [] diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index 1ea0b1d..fed5970 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -34,7 +34,7 @@ class APAIE(InfoExtractor): video_id, base_url = mobj.group('id', 'base_url') webpage = self._download_webpage( - '%s/player/%s' % (base_url, video_id), video_id) + f'{base_url}/player/{video_id}', video_id) jwplatform_id = self._search_regex( r'media[iI]d\s*:\s*["\'](?P[a-zA-Z0-9]{8})', webpage, @@ -47,7 +47,7 @@ class APAIE(InfoExtractor): def extract(field, name=None): return self._search_regex( - r'\b%s["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' % field, + rf'\b{field}["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, name or field, default=None, group='value') title = extract('title') or video_id diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index 49bbeab..bd301e9 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -24,7 +24,7 @@ class ApplePodcastsIE(InfoExtractor): 'duration': 6454, 'series': 'The Tim Dillon Show', 'thumbnail': 're:.+[.](png|jpe?g|webp)', - } + }, }, { 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', 'only_matching': True, diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 21103ae..0a600f6 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,8 +1,8 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( int_or_none, parse_duration, @@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor): 'uploader_id': 'wb', }, }, - ] + ], }, { 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 'info_dict': { @@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor): webpage = self._download_webpage(url, movie) film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') film_data = self._download_json( - 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, + f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json', film_id, fatal=False) if film_data: @@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor): if not src: continue formats.append({ - 'format_id': '%s-%s' % (version, size), + 'format_id': f'{version}-{size}', 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'width': int_or_none(size_data.get('width')), 'height': int_or_none(size_data.get('height')), @@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor): page_data = film_data.get('page', {}) return self.playlist_result(entries, film_id, page_data.get('movie_title')) - playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') + playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc') def fix_html(s): s = re.sub(r'(?s).*?', '', s) @@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor): # like: http://trailers.apple.com/trailers/wb/gravity/ def _clean_json(m): - return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + return 'iTunes.playURL({});'.format(m.group(1).replace('\'', ''')) s = re.sub(self._JSON_RE, _clean_json, s) - s = '%s' % s - return s + return f'{s}' doc = self._download_xml(playlist_url, movie, transform_source=fix_html) playlist = [] @@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor): duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() - settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) + settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json') settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') formats = [] - for format in settings['metadata']['sizes']: + for fmt in settings['metadata']['sizes']: # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src']) formats.append({ 'url': format_url, - 'format': format['type'], - 'width': int_or_none(format['width']), - 'height': int_or_none(format['height']), + 'format': fmt['type'], + 'width': int_or_none(fmt['width']), + 'height': int_or_none(fmt['height']), }) playlist.append({ @@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor): 'title': 'Movie Studios', }, } - _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P%s)' % '|'.join(_SECTIONS) + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P{})'.format('|'.join(_SECTIONS)) _TESTS = [{ 'url': 'http://trailers.apple.com/#section=justadded', 'info_dict': { @@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor): def _real_extract(self, url): section = self._match_id(url) section_data = self._download_json( - 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], + 'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']), section) entries = [ self.url_result('http://trailers.apple.com' + e['location']) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 41f3a4f..f5a55ef 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,10 +1,11 @@ +from __future__ import annotations + import json import re import urllib.parse from .common import InfoExtractor from .youtube import YoutubeBaseInfoExtractor, YoutubeIE -from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..networking.exceptions import HTTPError from ..utils import ( @@ -145,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'Bells Of Rostov', 'ext': 'mp3', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3', 'md5': '1d0aabe03edca83ca58d9ed3b493a3c3', @@ -158,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor): 'description': 'md5:012b2d668ae753be36896f343d12a236', 'upload_date': '20190928', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { # Original formats are private 'url': 'https://archive.org/details/irelandthemakingofarepublic', @@ -202,8 +203,8 @@ class ArchiveOrgIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'display_id': 'irelandthemakingofarepublicreel2.mov', }, - } - ] + }, + ], }] @staticmethod @@ -220,7 +221,7 @@ class ArchiveOrgIE(InfoExtractor): def _real_extract(self, url): video_id = urllib.parse.unquote_plus(self._match_id(url)) - identifier, entry_id = (video_id.split('/', 1) + [None])[:2] + identifier, _, entry_id = video_id.partition('/') # Archive.org metadata API doesn't clearly demarcate playlist entries # or subtitle tracks, so we get them from the embeddable player. @@ -246,7 +247,7 @@ class ArchiveOrgIE(InfoExtractor): if track['kind'] != 'subtitles': continue entries[p['orig']][track['label']] = { - 'url': 'https://archive.org/' + track['file'].lstrip('/') + 'url': 'https://archive.org/' + track['file'].lstrip('/'), } metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) @@ -293,7 +294,9 @@ class ArchiveOrgIE(InfoExtractor): 'height': int_or_none(f.get('width')), 'filesize': int_or_none(f.get('size'))}) - extension = (f['name'].rsplit('.', 1) + [None])[1] + _, has_ext, extension = f['name'].rpartition('.') + if not has_ext: + extension = None # We don't want to skip private formats if the user has access to them, # however without access to an account with such privileges we can't implement/test this. @@ -308,7 +311,7 @@ class ArchiveOrgIE(InfoExtractor): 'filesize': int_or_none(f.get('size')), 'protocol': 'https', 'source_preference': 0 if f.get('source') == 'original' else -1, - 'format_note': f.get('source') + 'format_note': f.get('source'), }) for entry in entries.values(): @@ -371,7 +374,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/Zeurel', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg', - } + }, }, { # Internal link 'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0', @@ -388,7 +391,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/1veritasium', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA', - } + }, }, { # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description. # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description @@ -403,8 +406,8 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'machinima', 'uploader_url': 'https://www.youtube.com/user/machinima', 'thumbnail': r're:https?://.*\.(jpg|webp)', - 'uploader': 'machinima' - } + 'uploader': 'machinima', + }, }, { # FLV video. Video file URL does not provide itag information 'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw', @@ -421,7 +424,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'jawed', - } + }, }, { 'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA', 'info_dict': { @@ -437,7 +440,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/itsmadeon', 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # First capture is of dead video, second is the oldest from CDX response. 'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E', @@ -454,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'ETC News', - } + }, }, { # First capture of dead video, capture date in link links to dead capture. 'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E', @@ -473,15 +476,15 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'ETC News', }, 'expected_warnings': [ - r'unable to download capture webpage \(it may not be archived\)' - ] + r'unable to download capture webpage \(it may not be archived\)', + ], }, { # Very old YouTube page, has - YouTube in title. 'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg', 'info_dict': { 'id': '-06-KB9XTzg', 'ext': 'flv', - 'title': 'New Coin Hack!! 100% Safe!!' - } + 'title': 'New Coin Hack!! 100% Safe!!', + }, }, { 'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8', 'info_dict': { @@ -495,7 +498,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'DankPods', - } + }, }, { # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093 'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4', @@ -512,7 +515,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'PewDiePie', 'uploader_url': 'https://www.youtube.com/user/PewDiePie', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~June 2010 Capture. swfconfig 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y', @@ -527,7 +530,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', 'upload_date': '20090520', - } + }, }, { # Jan 2011: watch-video-date/eow-date surrounded by whitespace 'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc', @@ -542,7 +545,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 132, 'uploader_url': 'https://www.youtube.com/user/claybutlermusic', - } + }, }, { # ~May 2009 swfArgs. ytcfg is spread out over various vars 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY', @@ -557,7 +560,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 754, - } + }, }, { # ~June 2012. Upload date is in another lang so cannot extract. 'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA', @@ -571,7 +574,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'BlackNerdComedy', 'duration': 182, 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~July 2013 'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM', @@ -587,7 +590,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ', 'upload_date': '20060428', 'uploader': 'punkybird', - } + }, }, { # April 2020: Player response in player config 'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en', @@ -604,7 +607,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341', 'uploader_url': 'https://www.youtube.com/user/GameGrumps', - } + }, }, { # watch7-user-header with yt-user-info 'url': 'ytarchive:kbh4T_b4Ixw:20160307085057', @@ -619,7 +622,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'upload_date': '20150503', 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA', - } + }, }, { # April 2012 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU', @@ -634,35 +637,35 @@ class YoutubeWebArchiveIE(InfoExtractor): 'duration': 200, 'upload_date': '20120407', 'uploader_id': 'thecomputernerd01', - } + }, }, { 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M', - 'only_matching': True + 'only_matching': True, }, { # Video not archived, only capture is unavailable video page 'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10', - 'only_matching': True + 'only_matching': True, }, { # Encoded url 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc:20050214000000', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc', - 'only_matching': True + 'only_matching': True, }, ] _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE @@ -673,13 +676,13 @@ class YoutubeWebArchiveIE(InfoExtractor): _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers _YT_ALL_THUMB_SERVERS = orderedSet( - _YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]]) + [*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]]) _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/' _OLDEST_CAPTURE_DATE = 20050214000000 _NEWEST_CAPTURE_DATE = 20500101000000 - def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False): + def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False): # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md query = { 'url': url, @@ -688,14 +691,14 @@ class YoutubeWebArchiveIE(InfoExtractor): 'limit': 500, 'filter': ['statuscode:200'] + (filters or []), 'collapse': collapse or [], - **(query or {}) + **(query or {}), } res = self._download_json( 'https://web.archive.org/cdx/search/cdx', item_id, note or 'Downloading CDX API JSON', query=query, fatal=fatal) if isinstance(res, list) and len(res) >= 2: # format response to make it easier to use - return list(dict(zip(res[0], v)) for v in res[1:]) + return [dict(zip(res[0], v)) for v in res[1:]] elif not isinstance(res, list) or len(res) != 0: self.report_warning('Error while parsing CDX API response' + bug_reports_message()) @@ -852,7 +855,7 @@ class YoutubeWebArchiveIE(InfoExtractor): { 'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'), 'filesize': int_or_none(thumbnail_dict.get('length')), - 'preference': int_or_none(thumbnail_dict.get('length')) + 'preference': int_or_none(thumbnail_dict.get('length')), } for thumbnail_dict in response) if not try_all: break @@ -893,7 +896,7 @@ class YoutubeWebArchiveIE(InfoExtractor): for retry in retry_manager: try: urlh = self._request_webpage( - HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id), + HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'), video_id, note='Fetching archived video file url', expected_status=True) except ExtractorError as e: # HTTP Error 404 is expected if the video is not saved. @@ -924,21 +927,21 @@ class YoutubeWebArchiveIE(InfoExtractor): info['thumbnails'] = self._extract_thumbnails(video_id) if urlh: - url = compat_urllib_parse_unquote(urlh.url) + url = urllib.parse.unquote(urlh.url) video_file_url_qs = parse_qs(url) # Attempt to recover any ext & format info from playback url & response headers - format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} + fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} itag = try_get(video_file_url_qs, lambda x: x['itag'][0]) if itag and itag in YoutubeIE._formats: - format.update(YoutubeIE._formats[itag]) - format.update({'format_id': itag}) + fmt.update(YoutubeIE._formats[itag]) + fmt.update({'format_id': itag}) else: mime = try_get(video_file_url_qs, lambda x: x['mime'][0]) ext = (mimetype2ext(mime) or urlhandle_detect_ext(urlh) or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type'))) - format.update({'ext': ext}) - info['formats'] = [format] + fmt.update({'ext': ext}) + info['formats'] = [fmt] if not info.get('duration'): info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0])) diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index febd3d2..8da9bc4 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( extract_attributes, int_or_none, + join_nonempty, parse_iso8601, try_get, ) @@ -11,7 +12,7 @@ from ..utils import ( class ArcPublishingIE(InfoExtractor): _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - _VALID_URL = r'arcpublishing:(?P[a-z]+):(?P%s)' % _UUID_REGEX + _VALID_URL = rf'arcpublishing:(?P[a-z]+):(?P{_UUID_REGEX})' _TESTS = [{ # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', @@ -74,12 +75,12 @@ class ArcPublishingIE(InfoExtractor): def _extract_embed_urls(cls, url, webpage): entries = [] # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview - for powa_el in re.findall(r'(]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage): + for powa_el in re.findall(rf'(]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage): powa = extract_attributes(powa_el) or {} org = powa.get('data-org') uuid = powa.get('data-uuid') if org and uuid: - entries.append('arcpublishing:%s:%s' % (org, uuid)) + entries.append(f'arcpublishing:{org}:{uuid}') return entries def _real_extract(self, url): @@ -122,7 +123,7 @@ class ArcPublishingIE(InfoExtractor): elif stream_type in ('ts', 'hls'): m3u8_formats = self._extract_m3u8_formats( s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) - if all([f.get('acodec') == 'none' for f in m3u8_formats]): + if all(f.get('acodec') == 'none' for f in m3u8_formats): continue for f in m3u8_formats: height = f.get('height') @@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor): else: vbr = int_or_none(s.get('bitrate')) formats.append({ - 'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type, + 'format_id': join_nonempty(stream_type, vbr), 'vbr': vbr, 'width': int_or_none(s.get('width')), 'height': int_or_none(s.get('height')), diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 3db59c5..6fd6413 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -85,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor): formats.extend(self._extract_f4m_formats( update_url_query(stream_url, { 'hdcore': '3.1.1', - 'plugin': 'aasp-3.1.1.69.124' + 'plugin': 'aasp-3.1.1.69.124', }), video_id, f4m_id='hds', fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -96,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor): f = { 'url': server, 'play_path': stream_url, - 'format_id': 'a%s-rtmp-%s' % (num, quality), + 'format_id': f'a{num}-rtmp-{quality}', } else: f = { 'url': stream_url, - 'format_id': 'a%s-%s-%s' % (num, ext, quality) + 'format_id': f'a{num}-{ext}-{quality}', } m = re.search( r'_(?P\d+)x(?P\d+)\.mp4$', diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index de36ec8..aa6c5ca 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor): raise ExtractorError('Invalid URL', expected=True) media = self._download_json( - 'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id), + f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}', video_id, query={ # https://video.qbrick.com/docs/api/examples/library-api.html 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags', @@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor): formats.extend(self._extract_f4m_formats( href, video_id, f4m_id='hds', fatal=False)) elif mime_type == 'application/dash+xml': - formats.extend(self._extract_f4m_formats( - href, video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_mpd_formats( + href, video_id, mpd_id='dash', fatal=False)) elif mime_type == 'application/vnd.ms-sstr+xml': formats.extend(self._extract_ism_formats( href, video_id, ism_id='mss', fatal=False)) diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 9a5524a..f196f61 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( float_or_none, format_field, @@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor): 'view_count': int, 'tags': ['linearna_algebra'], 'start_time': 10, - } + }, }, { 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4', 'only_matching': True, @@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor): 'duration': float_or_none(video.get('duration'), 1000), 'view_count': int_or_none(video.get('views')), 'tags': video.get('hashtags'), - 'start_time': int_or_none(compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get('t', [None])[0]), + 'start_time': int_or_none(urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get('t', [None])[0]), } diff --git a/yt_dlp/extractor/art19.py b/yt_dlp/extractor/art19.py index 271c505..deec7ad 100644 --- a/yt_dlp/extractor/art19.py +++ b/yt_dlp/extractor/art19.py @@ -153,7 +153,7 @@ class Art19IE(InfoExtractor): 'series_id': ('series_id', {str}), 'timestamp': ('created_at', {parse_iso8601}), 'release_timestamp': ('released_at', {parse_iso8601}), - 'modified_timestamp': ('updated_at', {parse_iso8601}) + 'modified_timestamp': ('updated_at', {parse_iso8601}), })), **traverse_obj(rss_metadata, ('content', { 'title': ('episode_title', {str}), diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 46fe006..142d4b0 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -20,15 +20,15 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVIE(ArteTVBaseIE): - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) (?:https?:// (?: - (?:www\.)?arte\.tv/(?P%(langs)s)/videos| - api\.arte\.tv/api/player/v\d+/config/(?P%(langs)s) + (?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos| + api\.arte\.tv/api/player/v\d+/config/(?P{ArteTVBaseIE._ARTE_LANGUAGES}) ) |arte://program) - /(?P\d{6}-\d{3}-[AF]|LIVE) - ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES} + /(?P\d{{6}}-\d{{3}}-[AF]|LIVE) + ''' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'only_matching': True, @@ -145,7 +145,7 @@ class ArteTVIE(ArteTVBaseIE): language_code = self._LANG_MAP.get(lang) config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={ - 'x-validated-age': '18' + 'x-validated-age': '18', }) geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {} @@ -247,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor): 'description': 'md5:be40b667f45189632b78c1425c7c2ce1', 'upload_date': '20201116', }, - 'skip': 'No video available' + 'skip': 'No video available', }, { 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'only_matching': True, @@ -262,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor): class ArteTVPlaylistIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P%s)/videos/(?PRC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?PRC-\d{{6}})' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'only_matching': True, @@ -298,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): class ArteTVCategoryIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P%s)/videos/(?P[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P[\w-]+(?:/[\w-]+)*)/?\s*$' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/politics-and-society/', 'info_dict': { @@ -312,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE): @classmethod def suitable(cls, url): return ( - not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, )) + not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE)) and super().suitable(url)) def _real_extract(self, url): @@ -321,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE): items = [] for video in re.finditer( - r']*?href\s*=\s*(?P"|\'|\b)(?Phttps?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang, + rf']*?href\s*=\s*(?P"|\'|\b)(?Phttps?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)', webpage): video = video.group('url') if video == url: continue - if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): + if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)): items.append(video) title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 3a44e52..0fe95be 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -20,7 +20,7 @@ class AtresPlayerIE(InfoExtractor): 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', 'duration': 3413, }, - 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', @@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor): ] _API_BASE = 'https://api.atresplayer.com/' - def _handle_error(self, e, code): - if isinstance(e.cause, HTTPError) and e.cause.status == code: - error = self._parse_json(e.cause.response.read(), None) - if error.get('error') == 'required_registered': - self.raise_login_required() - raise ExtractorError(error['error_description'], expected=True) - raise - def _perform_login(self, username, password): self._request_webpage( self._API_BASE + 'login', None, 'Downloading login page') @@ -49,13 +41,15 @@ class AtresPlayerIE(InfoExtractor): target_url = self._download_json( 'https://account.atresmedia.com/api/login', None, 'Logging in', headers={ - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }, data=urlencode_postdata({ 'username': username, 'password': password, }))['targetUrl'] except ExtractorError as e: - self._handle_error(e, 400) + if isinstance(e.cause, HTTPError) and e.cause.status == 400: + raise ExtractorError('Invalid username and/or password', expected=True) + raise self._request_webpage(target_url, None, 'Following Target URL') @@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor): episode = self._download_json( self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) except ExtractorError as e: - self._handle_error(e, 403) + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + error = self._parse_json(e.cause.response.read(), None) + if error.get('error') == 'required_registered': + self.raise_login_required() + raise ExtractorError(error['error_description'], expected=True) + raise title = episode['titulo'] diff --git a/yt_dlp/extractor/atscaleconf.py b/yt_dlp/extractor/atscaleconf.py index 3f7b1e9..b219eee 100644 --- a/yt_dlp/extractor/atscaleconf.py +++ b/yt_dlp/extractor/atscaleconf.py @@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'data-scale-spring-2022', 'title': 'Data @Scale Spring 2022', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }, { 'url': 'https://atscaleconference.com/events/video-scale-2021/', @@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'video-scale-2021', 'title': 'Video @Scale 2021', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) return self.playlist_from_matches( re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), - ie='Generic', playlist_id=id, + ie='Generic', playlist_id=playlist_id, title=self._og_search_title(webpage), description=self._og_search_description(webpage)) diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 20ee34c..37bb616 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor): 'id': 'v-ce9cgn1e70n5-1', 'ext': 'mp4', 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', - } + }, }, { 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', 'only_matching': True, @@ -66,10 +66,10 @@ class ATVAtIE(InfoExtractor): video_id=video_id) video_title = json_data['views']['default']['page']['title'] - contentResource = json_data['views']['default']['page']['contentResource'] - content_id = contentResource[0]['id'] - content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']} - for id, content in enumerate(contentResource)] + content_resource = json_data['views']['default']['page']['contentResource'] + content_id = content_resource[0]['id'] + content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']} + for id_, content in enumerate(content_resource)] time_of_request = dt.datetime.now() not_before = time_of_request - dt.timedelta(minutes=5) @@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor): videos = self._download_json( 'https://vas-v4.p7s1video.net/4.0/getsources', content_id, 'Downloading videos JSON', query={ - 'token': jwt_token.decode('utf-8') + 'token': jwt_token.decode('utf-8'), }) - video_id, videos_data = list(videos['data'].items())[0] + video_id, videos_data = next(iter(videos['data'].items())) error_msg = try_get(videos_data, lambda x: x['error']['title']) if error_msg == 'Geo check failed': self.raise_geo_restricted(error_msg) elif error_msg: raise ExtractorError(error_msg) entries = [ - self._extract_video_info(url, contentResource[video['id']], video) + self._extract_video_info(url, content_resource[video['id']], video) for video in videos_data] return { diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index 35114e5..c5a9c7e 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor): 'timestamp': 1448354940, 'duration': 74022, 'view_count': int, - } + }, }, { 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', 'only_matching': True, @@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor): bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) if bitrate: f.update({ - 'format_id': 'http-%s' % bitrate, + 'format_id': f'http-{bitrate}', }) formats.append(f) diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py index a23fcd2..751b74a 100644 --- a/yt_dlp/extractor/audioboom.py +++ b/yt_dlp/extractor/audioboom.py @@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 4000.99, 'uploader': 'Sue Perkins: An hour or so with...', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', - } + }, }, { # Direct mp3-file link 'url': 'https://audioboom.com/posts/8128496.mp3', 'md5': 'e329edf304d450def95c7f86a9165ee1', @@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 1689.7, 'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904', - } + }, }, { 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', 'only_matching': True, diff --git a/yt_dlp/extractor/audiodraft.py b/yt_dlp/extractor/audiodraft.py index 71e5afd..484ad4e 100644 --- a/yt_dlp/extractor/audiodraft.py +++ b/yt_dlp/extractor/audiodraft.py @@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor): headers={ 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest', - }, data=f'id={player_entry_id}'.encode('utf-8')) + }, data=f'id={player_entry_id}'.encode()) return { 'id': str(data_json['entry_id']), @@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) - player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_entry_id = self._search_regex( + r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id') return self._audiodraft_extract_from_id(player_entry_id) @@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - return self._audiodraft_extract_from_id(f'player_entry_{id}') + video_id = self._match_id(url) + return self._audiodraft_extract_from_id(f'player_entry_{video_id}') diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index 5c4160f..1d4460c 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -3,7 +3,6 @@ import time from .common import InfoExtractor from .soundcloud import SoundcloudIE -from ..compat import compat_str from ..utils import ( ExtractorError, url_basename, @@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor): 'id': '310086', 'ext': 'mp3', 'uploader': 'Roosh Williams', - 'title': 'Extraordinary' - } + 'title': 'Extraordinary', + }, }, # audiomack wrapper around soundcloud song # Needs new test URL. @@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor): # API is inconsistent with errors if 'url' not in api_response or not api_response['url'] or 'error' in api_response: - raise ExtractorError('Invalid url %s' % url) + raise ExtractorError(f'Invalid url {url}') # Audiomack wraps a lot of soundcloud tracks in their branded wrapper # if so, pass the work off to the soundcloud extractor @@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor): return self.url_result(api_response['url'], SoundcloudIE.ie_key()) return { - 'id': compat_str(api_response.get('id', album_url_tag)), + 'id': str(api_response.get('id', album_url_tag)), 'uploader': api_response.get('artist'), 'title': api_response.get('title'), 'url': api_response['url'], @@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor): 'info_dict': { 'id': '812251', - 'title': 'Tha Tour: Part 2 (Official Mixtape)' - } + 'title': 'Tha Tour: Part 2 (Official Mixtape)', + }, }, # Album playlist ripped from fakeshoredrive with no metadata { @@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor): 'id': '837576', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }, { 'info_dict': { 'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)', 'id': '837580', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }], - } + }, ] def _real_extract(self, url): @@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor): api_response = self._download_json( 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' % (album_url_tag, track_no, time.time()), album_url_tag, - note='Querying song information (%d)' % (track_no + 1)) + note=f'Querying song information ({track_no + 1})') # Total failure, only occurs when url is totally wrong # Won't happen in middle of valid playlist (next case) if 'url' not in api_response or 'error' in api_response: - raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) + raise ExtractorError(f'Invalid url for track {track_no} of album url {url}') # URL is good but song id doesn't exist - usually means end of playlist elif not api_response['url']: break @@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor): # Pull out the album metadata and add to result (if it exists) for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: if apikey in api_response and resultkey not in result: - result[resultkey] = compat_str(api_response[apikey]) + result[resultkey] = str(api_response[apikey]) song_id = url_basename(api_response['url']).rpartition('.')[0] result['entries'].append({ - 'id': compat_str(api_response.get('id', song_id)), + 'id': str(api_response.get('id', song_id)), 'uploader': api_response.get('artist'), 'title': api_response.get('title', song_id), 'url': api_response['url'], diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index 6448b44..c611c6e 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -1,7 +1,7 @@ import random +import urllib.parse from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote from ..utils import ExtractorError, str_or_none, try_get @@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor): if response_data is not None: return response_data if len(response) == 1 and 'message' in response: - raise ExtractorError('API error: %s' % response['message'], + raise ExtractorError('API error: {}'.format(response['message']), expected=True) raise ExtractorError('Unexpected API response') def _select_api_base(self): """Selecting one of the currently available API hosts""" - response = super(AudiusBaseIE, self)._download_json( + response = super()._download_json( 'https://api.audius.co/', None, note='Requesting available API hosts', errnote='Unable to request available API hosts') @@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor): anything from this link, since the Audius API won't be able to resolve this url """ - url = compat_urllib_parse_unquote(url) - title = compat_urllib_parse_unquote(title) + url = urllib.parse.unquote(url) + title = urllib.parse.unquote(title) if '/' in title or '%2F' in title: fixed_title = title.replace('/', '%5C').replace('%2F', '%5C') return url.replace(title, fixed_title) @@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor): if self._API_BASE is None: self._select_api_base() try: - response = super(AudiusBaseIE, self)._download_json( - '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note, + response = super()._download_json( + f'{self._API_BASE}{self._API_V}{path}', item_id, note=note, errnote=errnote, expected_status=expected_status) except ExtractorError as exc: # some of Audius API hosts may not work as expected and return HTML - if 'Failed to parse JSON' in compat_str(exc): + if 'Failed to parse JSON' in str(exc): raise ExtractorError('An error occurred while receiving data. Try again', expected=True) raise exc return self._get_response_data(response) def _resolve_url(self, url, item_id): - return self._api_request('/resolve?url=%s' % url, item_id, + return self._api_request(f'/resolve?url={url}', item_id, expected_status=404) @@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, { # Regular track @@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, ] _ARTWORK_MAP = { - "150x150": 150, - "480x480": 480, - "1000x1000": 1000 + '150x150': 150, + '480x480': 480, + '1000x1000': 1000, } def _real_extract(self, url): @@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE): else: # API link title = None # uploader = None - track_data = self._api_request('/tracks/%s' % track_id, track_id) + track_data = self._api_request(f'/tracks/{track_id}', track_id) if not isinstance(track_data, dict): raise ExtractorError('Unexpected API response') @@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE): if isinstance(artworks_data, dict): for quality_key, thumbnail_url in artworks_data.items(): thumbnail = { - "url": thumbnail_url + 'url': thumbnail_url, } quality_code = self._ARTWORK_MAP.get(quality_key) if quality_code is not None: @@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE): return { 'id': track_id, 'title': track_data.get('title', title), - 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id), + 'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream', 'ext': 'mp3', 'description': track_data.get('description'), 'duration': track_data.get('duration'), 'track': track_data.get('title'), - 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str), + 'artist': try_get(track_data, lambda x: x['user']['name'], str), 'genre': track_data.get('genre'), 'thumbnails': thumbnails, 'view_count': track_data.get('play_count'), @@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE _TESTS = [ { 'url': 'audius:9RWlo', - 'only_matching': True + 'only_matching': True, }, { 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo', - 'only_matching': True + 'only_matching': True, }, ] @@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE): if not track_id: raise ExtractorError('Unable to get track ID from playlist') entries.append(self.url_result( - 'audius:%s' % track_id, + f'audius:{track_id}', ie=AudiusTrackIE.ie_key(), video_id=track_id)) return entries @@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE): raise ExtractorError('Unable to get playlist ID') playlist_tracks = self._api_request( - '/playlists/%s/tracks' % playlist_id, + f'/playlists/{playlist_id}/tracks', title, note='Downloading playlist tracks metadata', errnote='Unable to download playlist tracks metadata') if not isinstance(playlist_tracks, list): @@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I profile_audius_id = _profile_data[0]['id'] profile_bio = _profile_data[0].get('bio') - api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id) + api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id) return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index a8dfb3e..4066a5a 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -1,10 +1,7 @@ import base64 +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( format_field, int_or_none, @@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor): show_id, video_id, season_id = self._match_valid_url(url).groups() if video_id and int(video_id) > 0: return self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') + f'http://awaan.ae/media/{video_id}', 'AWAANVideo') elif season_id and int(season_id) > 0: return self.url_result(smuggle_url( - 'http://awaan.ae/program/season/%s' % season_id, + f'http://awaan.ae/program/season/{season_id}', {'show_id': show_id}), 'AWAANSeason') else: return self.url_result( - 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') + f'http://awaan.ae/program/{show_id}', 'AWAANSeason') class AWAANBaseIE(InfoExtractor): @@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE): video_id = self._match_id(url) video_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}', video_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(video_data, video_id, False) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({ 'id': video_data['id'], 'user_id': video_data['user_id'], 'signature': video_data['signature'], @@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE): channel_id = self._match_id(url) channel_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}', channel_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(channel_data, channel_id, True) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), 'signature': channel_data['signature'], @@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor): show_id = smuggled_data.get('show_id') if show_id is None: season = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}', season_id, headers={'Origin': 'http://awaan.ae'}) show_id = season['id'] data['show_id'] = show_id @@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor): 'http://admin.mangomolo.com/analytics/index.php/plus/show', show_id, data=urlencode_postdata(data), headers={ 'Origin': 'http://awaan.ae', - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }) if not season_id: season_id = show['default_season'] @@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor): entries = [] for video in show['videos']: - video_id = compat_str(video['id']) + video_id = str(video['id']) entries.append(self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) + f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id)) return self.playlist_result(entries, season_id, title) diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index 4ebef92..177c410 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -1,9 +1,9 @@ import datetime as dt import hashlib import hmac +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor @@ -18,20 +18,20 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with 'Accept': 'application/json', 'Host': self._AWS_PROXY_HOST, 'X-Amz-Date': amz_date, - 'X-Api-Key': self._AWS_API_KEY + 'X-Api-Key': self._AWS_API_KEY, } session_token = aws_dict.get('session_token') if session_token: headers['X-Amz-Security-Token'] = session_token def aws_hash(s): - return hashlib.sha256(s.encode('utf-8')).hexdigest() + return hashlib.sha256(s.encode()).hexdigest() # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - canonical_querystring = compat_urllib_parse_urlencode(query) + canonical_querystring = urllib.parse.urlencode(query) canonical_headers = '' for header_name, header_value in sorted(headers.items()): - canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) + canonical_headers += f'{header_name.lower()}:{header_value}\n' signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) canonical_request = '\n'.join([ 'GET', @@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with canonical_querystring, canonical_headers, signed_headers, - aws_hash('') + aws_hash(''), ]) # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html @@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html def aws_hmac(key, msg): - return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) + return hmac.new(key, msg.encode(), hashlib.sha256) def aws_hmac_digest(key, msg): return aws_hmac(key, msg).digest() @@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with def aws_hmac_hexdigest(key, msg): return aws_hmac(key, msg).hexdigest() - k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') + k_signing = ('AWS4' + aws_dict['secret_key']).encode() for value in credential_scope_list: k_signing = aws_hmac_digest(k_signing, value) @@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html headers['Authorization'] = ', '.join([ - '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), - 'SignedHeaders=%s' % signed_headers, - 'Signature=%s' % signature, + '{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), + f'SignedHeaders={signed_headers}', + f'Signature={signature}', ]) return self._download_json( - 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), + 'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), video_id, headers=headers) diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index d1686ee..0e3a03f 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -38,14 +38,14 @@ class AZMedienIE(InfoExtractor): 'timestamp': 1538328802, 'view_count': int, 'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031', - 'duration': 1930 + 'duration': 1930, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', - 'only_matching': True + 'only_matching': True, }] _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be' _PARTNER_ID = '1719221' @@ -62,5 +62,5 @@ class AZMedienIE(InfoExtractor): })['data']['context']['mainAsset']['video']['kaltura']['kalturaId'] return self.url_result( - 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), + f'kaltura:{self._PARTNER_ID}:{entry_id}', ie=KalturaIE.ie_key(), video_id=entry_id) diff --git a/yt_dlp/extractor/baidu.py b/yt_dlp/extractor/baidu.py index 8786d67..a1ad424 100644 --- a/yt_dlp/extractor/baidu.py +++ b/yt_dlp/extractor/baidu.py @@ -24,8 +24,9 @@ class BaiduVideoIE(InfoExtractor): }] def _call_api(self, path, category, playlist_id, note): - return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( - path, category, playlist_id), playlist_id, note) + return self._download_json( + f'http://app.video.baidu.com/{path}/?worktype=adnative{category}&id={playlist_id}', + playlist_id, note) def _real_extract(self, url): category, playlist_id = self._match_valid_url(url).groups() @@ -44,7 +45,7 @@ class BaiduVideoIE(InfoExtractor): 'xqsingle', category, playlist_id, 'Download episodes JSON metadata') entries = [self.url_result( - episode['url'], video_title=episode['title'] + episode['url'], video_title=episode['title'], ) for episode in episodes_detail['videos']] return self.playlist_result( diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index c4e07a7..d10bdf8 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -1,10 +1,7 @@ import math +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( InAdvancePagedList, format_field, @@ -20,8 +17,8 @@ class BanByeBaseIE(InfoExtractor): @staticmethod def _extract_playlist_id(url, param='playlist'): - return compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get(param, [None])[0] + return urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get(param, [None])[0] def _extract_playlist(self, playlist_id): data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index e89b3a6..61cbab5 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -3,7 +3,6 @@ import re import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, @@ -42,7 +41,7 @@ class BandcampIE(InfoExtractor): 'uploader_id': 'youtube-dl', 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', }, - '_skip': 'There is a limit of 200 free downloads / month for the test song' + 'skip': 'There is a limit of 200 free downloads / month for the test song', }, { # free download 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', @@ -119,7 +118,7 @@ class BandcampIE(InfoExtractor): def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True): return self._parse_json(self._html_search_regex( - r'data-%s=(["\'])({.+?})\1' % attr, webpage, + rf'data-{attr}=(["\'])({{.+?}})\1', webpage, attr + ' data', group=2), video_id, fatal=fatal) def _real_extract(self, url): @@ -167,7 +166,7 @@ class BandcampIE(InfoExtractor): download_link = tralbum.get('freeDownloadPage') if download_link: - track_id = compat_str(tralbum['id']) + track_id = str(tralbum['id']) download_webpage = self._download_webpage( download_link, track_id, 'Downloading free downloads page') @@ -192,7 +191,7 @@ class BandcampIE(InfoExtractor): if isinstance(download_formats_list, list): for f in blob['download_formats']: name, ext = f.get('name'), f.get('file_extension') - if all(isinstance(x, compat_str) for x in (name, ext)): + if all(isinstance(x, str) for x in (name, ext)): download_formats[name] = ext.strip('.') for format_id, f in downloads.items(): @@ -207,7 +206,7 @@ class BandcampIE(InfoExtractor): }) format_id = f.get('encoding_name') or format_id stat = self._download_json( - stat_url, track_id, 'Downloading %s JSON' % format_id, + stat_url, track_id, f'Downloading {format_id} JSON', transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], fatal=False) if not stat: @@ -225,7 +224,7 @@ class BandcampIE(InfoExtractor): 'acodec': format_id.split('-')[0], }) - title = '%s - %s' % (artist, track) if artist else track + title = f'{artist} - {track}' if artist else track if not duration: duration = float_or_none(self._html_search_meta( @@ -267,7 +266,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311756226, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, { 'md5': '1a2c32e2691474643e912cc6cd4bffaa', @@ -278,7 +277,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311757238, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, ], 'info_dict': { @@ -287,9 +286,9 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'blazo', }, 'params': { - 'playlistend': 2 + 'playlistend': 2, }, - 'skip': 'Bandcamp imposes download limits.' + 'skip': 'Bandcamp imposes download limits.', }, { 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', 'info_dict': { @@ -324,7 +323,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): return (False if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) - else super(BandcampAlbumIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): uploader_id, album_id = self._match_valid_url(url).groups() @@ -376,7 +375,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE }, }, { 'url': 'https://bandcamp.com/?blah/blah@&show=228', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -407,7 +406,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE title = show.get('audio_title') or 'Bandcamp Weekly' subtitle = show.get('subtitle') if subtitle: - title += ' - %s' % subtitle + title += f' - {subtitle}' return { 'id': show_id, @@ -419,7 +418,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), 'episode_id': show_id, - 'formats': formats + 'formats': formats, } @@ -440,7 +439,7 @@ class BandcampUserIE(InfoExtractor): 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'id': 'dotscale', - 'title': 'Discography of dotscale' + 'title': 'Discography of dotscale', }, 'playlist_count': 1, }, { diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 82dc9ab..46f2978 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -23,7 +23,7 @@ class BannedVideoIE(InfoExtractor): 'description': 'md5:560d96f02abbebe6c6b78b47465f6b28', 'upload_date': '20200324', 'timestamp': 1585087895, - } + }, }] _GRAPHQL_GETMETADATA_QUERY = ''' @@ -84,15 +84,15 @@ query GetCommentReplies($id: String!) { 'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY, } - def _call_api(self, video_id, id, operation, note): + def _call_api(self, video_id, id_var, operation, note): return self._download_json( 'https://api.infowarsmedia.com/graphql', video_id, note=note, headers={ - 'Content-Type': 'application/json; charset=utf-8' + 'Content-Type': 'application/json; charset=utf-8', }, data=json.dumps({ - 'variables': {'id': id}, + 'variables': {'id': id_var}, 'operationName': operation, - 'query': self._GRAPHQL_QUERIES[operation] + 'query': self._GRAPHQL_QUERIES[operation], }).encode('utf8')).get('data') def _get_comments(self, video_id, comments, comment_data): @@ -151,5 +151,5 @@ query GetCommentReplies($id: String!) { 'tags': [tag.get('name') for tag in video_info.get('tags')], 'availability': self._availability(is_unlisted=video_info.get('unlisted')), 'comments': comments, - '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')) + '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')), } diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index f6b58b3..3af923f 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -2,10 +2,10 @@ import functools import itertools import json import re +import urllib.parse import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import compat_str, compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -35,7 +35,7 @@ class BBCCoUkIE(InfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})' - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) https?:// (?:www\.)?bbc\.co\.uk/ (?: @@ -45,8 +45,8 @@ class BBCCoUkIE(InfoExtractor): radio/player/| events/[^/]+/play/[^/]+/ ) - (?P%s)(?!/(?:episodes|broadcasts|clips)) - ''' % _ID_REGEX + (?P{_ID_REGEX})(?!/(?:episodes|broadcasts|clips)) + ''' _EMBED_REGEX = [r'setPlaylist\("(?Phttps?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)'] _LOGIN_URL = 'https://account.bbc.com/signin' @@ -75,7 +75,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/', @@ -148,7 +148,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz', 'note': 'Video', @@ -162,7 +162,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls', 'info_dict': { @@ -268,19 +268,19 @@ class BBCCoUkIE(InfoExtractor): error = clean_html(get_element_by_class('form-message', response)) if error: raise ExtractorError( - 'Unable to login: %s' % error, expected=True) + f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') class MediaSelectionError(Exception): - def __init__(self, id): - self.id = id + def __init__(self, error_id): + self.id = error_id def _extract_asx_playlist(self, connection, programme_id): asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') return [ref.get('href') for ref in asx.findall('./Entry/ref')] def _extract_items(self, playlist): - return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) + return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item') def _extract_medias(self, media_selection): error = media_selection.get('result') @@ -312,7 +312,7 @@ class BBCCoUkIE(InfoExtractor): def _raise_extractor_error(self, media_selection_error): raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, media_selection_error.id), + f'{self.IE_NAME} returned error: {media_selection_error.id}', expected=True) def _download_media_selector(self, programme_id): @@ -372,7 +372,7 @@ class BBCCoUkIE(InfoExtractor): for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): formats.append({ 'url': ref, - 'format_id': 'ref%s_%s' % (i, format_id), + 'format_id': f'ref{i}_{format_id}', }) elif transfer_format == 'dash': formats.extend(self._extract_mpd_formats( @@ -394,7 +394,7 @@ class BBCCoUkIE(InfoExtractor): href, programme_id, f4m_id=format_id, fatal=False)) else: if not supplier and bitrate: - format_id += '-%d' % bitrate + format_id += f'-{bitrate}' fmt = { 'format_id': format_id, 'filesize': file_size, @@ -423,9 +423,9 @@ class BBCCoUkIE(InfoExtractor): identifier = connection.get('identifier') server = connection.get('server') fmt.update({ - 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), + 'url': f'{protocol}://{server}/{application}?{auth_string}', 'play_path': identifier, - 'app': '%s?%s' % (application, auth_string), + 'app': f'{application}?{auth_string}', 'page_url': 'http://www.bbc.co.uk', 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', 'rtmp_live': False, @@ -441,7 +441,7 @@ class BBCCoUkIE(InfoExtractor): def _download_playlist(self, playlist_id): try: playlist = self._download_json( - 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id, + f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json', playlist_id, 'Downloading playlist JSON') formats = [] subtitles = {} @@ -480,32 +480,32 @@ class BBCCoUkIE(InfoExtractor): def _process_legacy_playlist(self, playlist_id): return self._process_legacy_playlist_url( - 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id) + f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id) def _download_legacy_playlist_url(self, url, playlist_id=None): return self._download_xml( url, playlist_id, 'Downloading legacy playlist XML') def _extract_from_legacy_playlist(self, playlist, playlist_id): - no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS) + no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems') if no_items is not None: reason = no_items.get('reason') if reason == 'preAvailability': - msg = 'Episode %s is not yet available' % playlist_id + msg = f'Episode {playlist_id} is not yet available' elif reason == 'postAvailability': - msg = 'Episode %s is no longer available' % playlist_id + msg = f'Episode {playlist_id} is no longer available' elif reason == 'noMedia': - msg = 'Episode %s is not currently available' % playlist_id + msg = f'Episode {playlist_id} is not currently available' else: - msg = 'Episode %s is not available: %s' % (playlist_id, reason) + msg = f'Episode {playlist_id} is not available: {reason}' raise ExtractorError(msg, expected=True) for item in self._extract_items(playlist): kind = item.get('kind') if kind not in ('programme', 'radioProgramme'): continue - title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text - description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS) + title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text + description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary') description = description_el.text if description_el is not None else None def get_programme_id(item): @@ -515,7 +515,7 @@ class BBCCoUkIE(InfoExtractor): if value and re.match(r'^[pb][\da-z]{7}$', value): return value get_from_attributes(item) - mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS) + mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator') if mediator is not None: return get_from_attributes(mediator) @@ -555,7 +555,7 @@ class BBCCoUkIE(InfoExtractor): if not programme_id: programme_id = self._search_regex( - r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None) + rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None) if programme_id: formats, subtitles = self._download_media_selector(programme_id) @@ -641,7 +641,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE }, 'params': { 'skip_download': True, - } + }, }, { # article with single video embedded with data-playable containing XML playlist # with direct video links as progressiveDownloadUrl (for now these are extracted) @@ -884,7 +884,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'bbc_world_service', 'series': 'CrowdScience', 'chapters': [], - } + }, }, { # onion routes 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576', 'only_matching': True, @@ -897,7 +897,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE) return (False if any(ie.suitable(url) for ie in EXCLUDE_IE) - else super(BBCIE, cls).suitable(url)) + else super().suitable(url)) def _extract_from_media_meta(self, media_meta, video_id): # Direct links to media in media metadata (e.g. @@ -1009,7 +1009,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if playlist: entry = None for key in ('streaming', 'progressiveDownload'): - playlist_url = playlist.get('%sUrl' % key) + playlist_url = playlist.get(f'{key}Url') if not playlist_url: continue try: @@ -1035,7 +1035,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227 group_id = self._search_regex( - r']+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, + rf']+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})', webpage, 'group id', default=None) if group_id: return self.url_result( @@ -1043,9 +1043,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( - [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, - r']+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX, - r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], + [rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"', + rf']+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"', + rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'], webpage, 'vpid', default=None) if programme_id: @@ -1142,7 +1142,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE video_id, url_transparent=True) entry.update({ 'timestamp': traverse_obj(morph_payload, ( - 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}) + 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}), ), **traverse_obj(video_data, { 'thumbnail': (('iChefImage', 'image'), {url_or_none}, any), @@ -1189,7 +1189,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}), 'start_time': ('offset', 'start', {float_or_none}), 'end_time': ('offset', 'end', {float_or_none}), - }) + }), ), } @@ -1287,7 +1287,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any), 'duration': ('versions', 0, 'duration', {int}), 'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}), - }) + }), } def is_type(*types): @@ -1331,7 +1331,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if blocks: summary = [] for block in blocks: - text = try_get(block, lambda x: x['model']['text'], compat_str) + text = try_get(block, lambda x: x['model']['text'], str) if text: summary.append(text) if summary: @@ -1411,9 +1411,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE entries, playlist_id, playlist_title, playlist_description) def extract_all(pattern): - return list(filter(None, map( - lambda s: self._parse_json(s, playlist_id, fatal=False), - re.findall(pattern, webpage)))) + return list(filter(None, ( + self._parse_json(s, playlist_id, fatal=False) + for s in re.findall(pattern, webpage)))) # US accessed article with single embedded video (e.g. # https://www.bbc.com/news/uk-68546268) @@ -1435,14 +1435,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # Multiple video article (e.g. # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460) - EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX + EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?' entries = [] for match in extract_all(r'new\s+SMP\(({.+?})\)'): embed_url = match.get('playerSettings', {}).get('externalEmbedUrl') if embed_url and re.match(EMBED_URL, embed_url): entries.append(embed_url) entries.extend(re.findall( - r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage)) + rf'setPlaylist\("({EMBED_URL})"\)', webpage)) if entries: return self.playlist_result( [self.url_result(entry_, 'BBCCoUk') for entry_ in entries], @@ -1492,11 +1492,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE video_id = media_meta.get('externalId') if not video_id: - video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num) + video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}' title = media_meta.get('caption') if not title: - title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num) + title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}' duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration')) @@ -1557,8 +1557,8 @@ class BBCCoUkArticleIE(InfoExtractor): class BBCCoUkPlaylistBaseIE(InfoExtractor): def _entries(self, webpage, url, playlist_id): - single_page = 'page' in compat_urlparse.parse_qs( - compat_urlparse.urlparse(url).query) + single_page = 'page' in urllib.parse.parse_qs( + urllib.parse.urlparse(url).query) for page_num in itertools.count(2): for video_id in re.findall( self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage): @@ -1572,8 +1572,8 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): if not next_page: break webpage = self._download_webpage( - compat_urlparse.urljoin(url, next_page), playlist_id, - 'Downloading page %d' % page_num, page_num) + urllib.parse.urljoin(url, next_page), playlist_id, + f'Downloading page {page_num}', page_num) def _real_extract(self, url): playlist_id = self._match_id(url) @@ -1588,7 +1588,7 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor): - _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P%s)' % BBCCoUkIE._ID_REGEX + _VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P{BBCCoUkIE._ID_REGEX})' @staticmethod def _get_default(episode, key, default_key='default'): @@ -1712,11 +1712,11 @@ class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE): variables['sliceId'] = series_id return self._download_json( 'https://graph.ibl.api.bbc.co.uk/', pid, headers={ - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', }, data=json.dumps({ 'id': '5692d93d5aac8d796a0305e895e61551', 'variables': variables, - }).encode('utf-8'))['data']['programme'] + }).encode())['data']['programme'] @staticmethod def _get_playlist_data(data): @@ -1776,7 +1776,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE): def _call_api(self, pid, per_page, page=1, series_id=None): return self._download_json( - 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid, + f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes', pid, query={ 'page': page, 'per_page': per_page, @@ -1792,7 +1792,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE): class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): IE_NAME = 'bbc.co.uk:playlist' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX + _VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)' _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s' _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)' _TESTS = [{ diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index 0aecbd0..acc8d12 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import int_or_none @@ -33,7 +32,7 @@ class BeatportIE(InfoExtractor): 'display_id': 'birds-original-mix', 'ext': 'mp4', 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", - } + }, }] def _real_extract(self, url): @@ -51,7 +50,7 @@ class BeatportIE(InfoExtractor): track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) - title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] + title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name'] if track['mix']: title += ' (' + track['mix'] + ')' @@ -89,7 +88,7 @@ class BeatportIE(InfoExtractor): images.append(image) return { - 'id': compat_str(track.get('id')) or track_id, + 'id': str(track.get('id')) or track_id, 'display_id': track.get('slug') or display_id, 'title': title, 'formats': formats, diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index da98ac3..960cdfa 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -23,7 +23,7 @@ class BeegIE(InfoExtractor): 'upload_date': '20220131', 'timestamp': 1643656455, 'display_id': '2540839', - } + }, }, { 'url': 'https://beeg.com/-0599050563103750?t=4-861', 'md5': 'bd8b5ea75134f7f07fad63008db2060e', @@ -38,7 +38,7 @@ class BeegIE(InfoExtractor): 'timestamp': 1643623200, 'display_id': '2569965', 'upload_date': '20220131', - } + }, }, { # api/v6 v2 'url': 'https://beeg.com/1941093077?t=911-1391', @@ -55,8 +55,8 @@ class BeegIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video = self._download_json( - 'https://store.externulls.com/facts/file/%s' % video_id, - video_id, 'Downloading JSON for %s' % video_id) + f'https://store.externulls.com/facts/file/{video_id}', + video_id, f'Downloading JSON for {video_id}') fc_facts = video.get('fc_facts') first_fact = {} diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py index 9d2324f..45f45d0 100644 --- a/yt_dlp/extractor/behindkink.py +++ b/yt_dlp/extractor/behindkink.py @@ -16,7 +16,7 @@ class BehindKinkIE(InfoExtractor): 'upload_date': '20141205', 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py index 677680b..ac45dd4 100644 --- a/yt_dlp/extractor/bellmedia.py +++ b/yt_dlp/extractor/bellmedia.py @@ -86,6 +86,6 @@ class BellMediaIE(InfoExtractor): return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), + 'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}', 'ie_key': 'NineCNineMedia', } diff --git a/yt_dlp/extractor/berufetv.py b/yt_dlp/extractor/berufetv.py index 8160cbd..5bba33a 100644 --- a/yt_dlp/extractor/berufetv.py +++ b/yt_dlp/extractor/berufetv.py @@ -16,7 +16,7 @@ class BerufeTVIE(InfoExtractor): 'tags': ['Studienfilm'], 'duration': 602.440, 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$', - } + }, }] def _real_extract(self, url): @@ -54,7 +54,7 @@ class BerufeTVIE(InfoExtractor): subtitles.setdefault(track['language'], []).append({ 'url': track['source'], 'name': track.get('label'), - 'ext': 'vtt' + 'ext': 'vtt', }) return { diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index cbf3dd0..3a8e743 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -19,7 +19,7 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download @@ -39,16 +39,16 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download 'skip_download': True, }, - } + }, ] - _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player' def _get_feed_query(self, uri): return { diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index c4621ca..87f0117 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -98,8 +98,8 @@ class BFMTVArticleIE(BFMTVBaseIE): 'timestamp': 1673341692, 'duration': 109.269, 'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'], - 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg' - } + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 02d1ba0..9c55bb9 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -1,10 +1,8 @@ +import base64 import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, -) class BigflixIE(InfoExtractor): @@ -21,7 +19,7 @@ class BigflixIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { # multiple formats 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', @@ -38,7 +36,7 @@ class BigflixIE(InfoExtractor): webpage, 'title') def decode_url(quoted_b64_url): - return compat_b64decode(compat_urllib_parse_unquote( + return base64.b64decode(urllib.parse.unquote( quoted_b64_url)).decode('utf-8') formats = [] @@ -47,7 +45,7 @@ class BigflixIE(InfoExtractor): video_url = decode_url(encoded_url) f = { 'url': video_url, - 'format_id': '%sp' % height, + 'format_id': f'{height}p', 'height': int(height), } if video_url.startswith('rtmp'): @@ -69,5 +67,5 @@ class BigflixIE(InfoExtractor): 'id': video_id, 'title': title, 'description': description, - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index acf78e4..b1c230f 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -36,7 +36,7 @@ class BigoIE(InfoExtractor): raise ExtractorError('Received invalid JSON data') if info_raw.get('code'): raise ExtractorError( - 'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True) + 'Bigo says: {} (code {})'.format(info_raw.get('msg'), info_raw.get('code')), expected=True) info = info_raw.get('data') or {} if not info.get('alive'): diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py index eb28932..2ba6370 100644 --- a/yt_dlp/extractor/bild.py +++ b/yt_dlp/extractor/bild.py @@ -20,7 +20,7 @@ class BildIE(InfoExtractor): 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 196, - } + }, }, { 'note': 'static MP4 and HLS', 'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html', @@ -32,7 +32,7 @@ class BildIE(InfoExtractor): 'description': 'md5:709b543c24dc31bbbffee73bccda34ad', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 69, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index b38c90b..a84b7a6 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -31,12 +31,12 @@ from ..utils import ( mimetype2ext, parse_count, parse_qs, + parse_resolution, qualities, smuggle_url, srt_subtitles_timecode, str_or_none, traverse_obj, - try_call, unified_timestamp, unsmuggle_url, url_or_none, @@ -47,6 +47,23 @@ from ..utils import ( class BilibiliBaseIE(InfoExtractor): _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?') + _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session + _wbi_key_cache = {} + + @property + def is_logged_in(self): + return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA')) + + def _check_missing_formats(self, play_info, formats): + parsed_qualities = set(traverse_obj(formats, (..., 'quality'))) + missing_formats = join_nonempty(*[ + traverse_obj(fmt, 'new_description', 'display_desc', 'quality') + for fmt in traverse_obj(play_info, ( + 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ') + if missing_formats: + self.to_screen( + f'Format(s) {missing_formats} are missing; you have to login or ' + f'become a premium member to download them. {self._login_hint()}') def extract_formats(self, play_info): format_names = { @@ -86,18 +103,75 @@ class BilibiliBaseIE(InfoExtractor): 'format': format_names.get(video.get('id')), } for video in traverse_obj(play_info, ('dash', 'video', ...))) - missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality'))) - if missing_formats: - self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; ' - f'you have to login or become premium member to download them. {self._login_hint()}') + if formats: + self._check_missing_formats(play_info, formats) + fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), { + 'url': ('url', {url_or_none}), + 'duration': ('length', {functools.partial(float_or_none, scale=1000)}), + 'filesize': ('size', {int_or_none}), + })) + if fragments: + formats.append({ + 'url': fragments[0]['url'], + 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))), + **({ + 'fragments': fragments, + 'protocol': 'http_dash_segments', + } if len(fragments) > 1 else {}), + **traverse_obj(play_info, { + 'quality': ('quality', {int_or_none}), + 'format_id': ('quality', {str_or_none}), + 'format_note': ('quality', {lambda x: format_names.get(x)}), + 'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}), + }), + **parse_resolution(format_names.get(play_info.get('quality'))), + }) return formats - def _download_playinfo(self, video_id, cid, headers=None): + def _get_wbi_key(self, video_id): + if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT: + return self._wbi_key_cache['key'] + + session_data = self._download_json( + 'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign') + + lookup = ''.join(traverse_obj(session_data, ( + 'data', 'wbi_img', ('img_url', 'sub_url'), + {lambda x: x.rpartition('/')[2].partition('.')[0]}))) + + # from getMixinKey() in the vendor js + mixin_key_enc_tab = [ + 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, + 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, + 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, + 36, 20, 34, 44, 52, + ] + + self._wbi_key_cache.update({ + 'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32], + 'ts': time.time(), + }) + return self._wbi_key_cache['key'] + + def _sign_wbi(self, params, video_id): + params['wts'] = round(time.time()) + params = { + k: ''.join(filter(lambda char: char not in "!'()*", str(v))) + for k, v in sorted(params.items()) + } + query = urllib.parse.urlencode(params) + params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest() + return params + + def _download_playinfo(self, bvid, cid, headers=None, qn=None): + params = {'bvid': bvid, 'cid': cid, 'fnval': 4048} + if qn: + params['qn'] = qn return self._download_json( - 'https://api.bilibili.com/x/player/playurl', video_id, - query={'bvid': video_id, 'cid': cid, 'fnval': 4048}, - note=f'Downloading video formats for cid {cid}', headers=headers)['data'] + 'https://api.bilibili.com/x/player/wbi/playurl', bvid, + query=self._sign_wbi(params, bvid), headers=headers, + note=f'Downloading video formats for cid {cid} {qn or ""}')['data'] def json2srt(self, json_data): srt_data = '' @@ -112,21 +186,21 @@ class BilibiliBaseIE(InfoExtractor): 'danmaku': [{ 'ext': 'xml', 'url': f'https://comment.bilibili.com/{cid}.xml', - }] + }], } - subtitle_info = traverse_obj(self._download_json( + video_info = self._download_json( 'https://api.bilibili.com/x/player/v2', video_id, query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid}, - note=f'Extracting subtitle info {cid}'), ('data', 'subtitle')) - subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan'])) - if not subs_list and traverse_obj(subtitle_info, 'allow_submit'): - if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie - self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True) - for s in subs_list: + note=f'Extracting subtitle info {cid}') + if traverse_obj(video_info, ('data', 'need_login_subtitle')): + self.report_warning( + f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True) + for s in traverse_obj(video_info, ( + 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])): subtitles.setdefault(s['lan'], []).append({ 'ext': 'srt', - 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)) + 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)), }) return subtitles @@ -203,19 +277,19 @@ class BilibiliBaseIE(InfoExtractor): self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges) return cid_edges - def _get_interactive_entries(self, video_id, cid, metainfo): + def _get_interactive_entries(self, video_id, cid, metainfo, headers=None): graph_version = traverse_obj( self._download_json( 'https://api.bilibili.com/x/player/wbi/v2', video_id, - 'Extracting graph version', query={'bvid': video_id, 'cid': cid}), + 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers), ('data', 'interaction', 'graph_version', {int_or_none})) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) for cid, edges in cid_edges.items(): - play_info = self._download_playinfo(video_id, cid) + play_info = self._download_playinfo(video_id, cid, headers=headers) yield { **metainfo, 'id': f'{video_id}_{cid}', - 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}', + 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}', 'formats': self.extract_formats(play_info), 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}', 'duration': float_or_none(play_info.get('timelength'), scale=1000), @@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE): 'timestamp': 1488353834, 'like_count': int, 'view_count': int, + '_old_archive_ids': ['bilibili 8903802_part1'], }, }, { 'note': 'old av URL version', 'url': 'http://www.bilibili.com/video/av1074402/', 'info_dict': { - 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$', + 'id': 'BV11x411K7CN', 'ext': 'mp4', + 'title': '【金坷垃】金泡沫', 'uploader': '菊子桑', 'uploader_id': '156160', - 'id': 'BV11x411K7CN', - 'title': '【金坷垃】金泡沫', 'duration': 308.36, 'upload_date': '20140420', 'timestamp': 1397983878, @@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE): 'comment_count': int, 'view_count': int, 'tags': list, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$', + '_old_archive_ids': ['bilibili 1074402_part1'], }, 'params': {'skip_download': True}, }, { @@ -269,7 +345,7 @@ class BiliBiliIE(BilibiliBaseIE): 'url': 'https://www.bilibili.com/video/BV1bK411W797', 'info_dict': { 'id': 'BV1bK411W797', - 'title': '物语中的人物是如何吐槽自己的OP的' + 'title': '物语中的人物是如何吐槽自己的OP的', }, 'playlist_count': 18, 'playlist': [{ @@ -288,8 +364,9 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } - }] + '_old_archive_ids': ['bilibili 498159642_part1'], + }, + }], }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } - }, { - 'note': 'video has subtitles', - 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', - 'info_dict': { - 'id': 'BV12N4y1M7rh', - 'ext': 'mp4', - 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1', - 'tags': list, - 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4', - 'duration': 313.557, - 'upload_date': '20220709', - 'uploader': '小夫太渴', - 'timestamp': 1657347907, - 'uploader_id': '1326814124', - 'comment_count': int, - 'view_count': int, - 'like_count': int, - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', - 'subtitles': 'count:2' + '_old_archive_ids': ['bilibili 498159642_part1'], }, - 'params': {'listsubtitles': True}, }, { 'url': 'https://www.bilibili.com/video/av8903802/', 'info_dict': { @@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE): 'comment_count': int, 'view_count': int, 'like_count': int, + '_old_archive_ids': ['bilibili 8903802_part1'], }, 'params': { 'skip_download': True, @@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 463665680_part1'], }, 'params': {'skip_download': True}, }, { @@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 893839363_part1'], }, - 'params': {'skip_download': True}, }, { 'note': 'newer festival video', 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f', @@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 778246196_part1'], + }, + }, { + 'note': 'legacy flv/mp4 video', + 'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4', + 'info_dict': { + 'id': 'BV1ms411Q7vw_p4', + 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛', + 'timestamp': 1458222815, + 'upload_date': '20160317', + 'description': '云南方言快乐生产线出品', + 'duration': float, + 'uploader': '一笑颠天', + 'uploader_id': '3916081', + 'view_count': int, + 'comment_count': int, + 'like_count': int, + 'tags': list, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 4120229_part4'], + }, + 'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}}, + 'playlist_count': 19, + 'playlist': [{ + 'info_dict': { + 'id': 'BV1ms411Q7vw_p4_0', + 'ext': 'flv', + 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛', + 'duration': 399.102, + }, + }], + }, { + 'note': 'legacy mp4-only video', + 'url': 'https://www.bilibili.com/video/BV1nx411u79K', + 'info_dict': { + 'id': 'BV1nx411u79K', + 'ext': 'mp4', + 'title': '【练习室】201603声乐练习《No Air》with VigoVan', + 'timestamp': 1508893551, + 'upload_date': '20171025', + 'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van', + 'duration': 80.384, + 'uploader': '伯远', + 'uploader_id': '10584494', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'tags': list, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 15700301_part1'], }, - 'params': {'skip_download': True}, }, { 'note': 'interactive/split-path video', 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/', @@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 292734508_part1'], }, 'playlist_count': 33, 'playlist': [{ @@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 292734508_part1'], }, }], }, { @@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE): 'upload_date': '20191021', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, + }, { + 'note': 'video has subtitles, which requires login', + 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', + 'info_dict': { + 'id': 'BV12N4y1M7rh', + 'ext': 'mp4', + 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1', + 'tags': list, + 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4', + 'duration': 313.557, + 'upload_date': '20220709', + 'uploader': '小夫太渴', + 'timestamp': 1657347907, + 'uploader_id': '1326814124', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'subtitles': 'count:2', # login required for CC subtitle + '_old_archive_ids': ['bilibili 898179753_part1'], + }, + 'params': {'listsubtitles': True}, + 'skip': 'login required for subtitle', }, { 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/', 'info_dict': { @@ -498,8 +631,9 @@ class BiliBiliIE(BilibiliBaseIE): if not self._match_valid_url(urlh.url): return self.url_result(urlh.url) - initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + headers['Referer'] = url + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) is_festival = 'videoData' not in initial_state if is_festival: video_data = initial_state['videoInfo'] @@ -548,7 +682,6 @@ class BiliBiliIE(BilibiliBaseIE): aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') - cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') festival_info = {} @@ -586,19 +719,65 @@ class BiliBiliIE(BilibiliBaseIE): is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate')) if is_interactive: return self.playlist_result( - self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{ - 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), - '__post_extractor': self.extract_comments(aid), - }) + self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo, + duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), + __post_extractor=self.extract_comments(aid)) else: - return { - **metainfo, - 'duration': float_or_none(play_info.get('timelength'), scale=1000), - 'chapters': self._get_chapters(aid, cid), - 'subtitles': self.extract_subtitles(video_id, cid), - 'formats': self.extract_formats(play_info), - '__post_extractor': self.extract_comments(aid), - } + formats = self.extract_formats(play_info) + + if not traverse_obj(play_info, ('dash')): + # we only have legacy formats and need additional work + has_qn = lambda x: x in traverse_obj(formats, (..., 'quality')) + for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})): + formats.extend(traverse_obj( + self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)), + lambda _, v: not has_qn(v['quality']))) + self._check_missing_formats(play_info, formats) + flv_formats = traverse_obj(formats, lambda _, v: v['fragments']) + if flv_formats and len(flv_formats) < len(formats): + # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one + if not self._configuration_arg('prefer_multi_flv'): + dropped_fmts = ', '.join( + f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats) + formats = traverse_obj(formats, lambda _, v: not v.get('fragments')) + if dropped_fmts: + self.to_screen( + f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. ' + 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"') + else: + formats = traverse_obj( + # XXX: Filtering by extractor-arg is for testing purposes + formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]), + ) or [max(flv_formats, key=lambda x: x['quality'])] + + if traverse_obj(formats, (0, 'fragments')): + # We have flv formats, which are individual short videos with their own timestamps and metainfo + # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround + return { + **metainfo, + '_type': 'multi_video', + 'entries': [{ + 'id': f'{metainfo["id"]}_{idx}', + 'title': metainfo['title'], + 'http_headers': metainfo['http_headers'], + 'formats': [{ + **fragment, + 'format_id': formats[0].get('format_id'), + }], + 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None, + '__post_extractor': self.extract_comments(aid) if idx == 0 else None, + } for idx, fragment in enumerate(formats[0]['fragments'])], + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + } + else: + return { + **metainfo, + 'formats': formats, + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + 'chapters': self._get_chapters(aid, cid), + 'subtitles': self.extract_subtitles(video_id, cid), + '__post_extractor': self.extract_comments(aid), + } class BiliBiliBangumiIE(BilibiliBaseIE): @@ -640,7 +819,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1425.256, 'timestamp': 1554566400, 'upload_date': '20190406', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'skip': 'Geo-restricted', }, { @@ -661,7 +840,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1922.129, 'timestamp': 1602853860, 'upload_date': '20201016', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }] @@ -764,7 +943,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'duration': 1525.777, 'timestamp': 1425074413, 'upload_date': '20150227', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -794,7 +973,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'title': '鬼灭之刃', 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b', }, - 'playlist_mincount': 26 + 'playlist_mincount': 26, }, { 'url': 'https://www.bilibili.com/bangumi/play/ss2251', 'info_dict': { @@ -819,7 +998,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'duration': 1436.992, 'timestamp': 1343185080, 'upload_date': '20120725', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -906,7 +1085,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -939,7 +1118,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }], 'params': {'playlist_items': '1'}, }, { @@ -969,7 +1148,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE): })) -class BilibiliSpaceBaseIE(InfoExtractor): +class BilibiliSpaceBaseIE(BilibiliBaseIE): def _extract_playlist(self, fetch_page, get_metadata, get_entries): first_page = fetch_page(0) metadata = get_metadata(first_page) @@ -989,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): 'id': '3985676', }, 'playlist_mincount': 178, + 'skip': 'login required', }, { 'url': 'https://space.bilibili.com/313580179/video', 'info_dict': { 'id': '313580179', }, 'playlist_mincount': 92, + 'skip': 'login required', }] - def _extract_signature(self, playlist_id): - session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False) - - key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0] - img_key = traverse_obj( - session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100' - sub_key = traverse_obj( - session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6' - - session_key = img_key + sub_key - - signature_values = [] - for position in ( - 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, - 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, - 57, 62, 11, 36, 20, 34, 44, 52 - ): - char_at_position = try_call(lambda: session_key[position]) - if char_at_position: - signature_values.append(char_at_position) - - return ''.join(signature_values)[:32] - def _real_extract(self, url): playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video') if not is_video_url: self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. ' 'To download audios, add a "/audio" to the URL') - signature = self._extract_signature(playlist_id) - def fetch_page(page_idx): query = { 'keyword': '', 'mid': playlist_id, - 'order': 'pubdate', + 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate', 'order_avoided': 'true', 'platform': 'web', 'pn': page_idx + 1, 'ps': 30, 'tid': 0, 'web_location': 1550101, - 'wts': int(time.time()), } - query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest() try: - response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search', - playlist_id, note=f'Downloading page {page_idx}', query=query, - headers={'referer': url}) + response = self._download_json( + 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id, + query=self._sign_wbi(query, playlist_id), + note=f'Downloading space page {page_idx}', headers={'Referer': url}) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 412: raise ExtractorError( 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True) raise - if response['code'] in (-352, -401): + status_code = response['code'] + if status_code == -401: raise ExtractorError( - f'Request is blocked by server ({-response["code"]}), ' - 'please add cookies, wait and try later.', expected=True) + 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True) + elif status_code == -352 and not self.is_logged_in: + self.raise_login_required('Request is rejected, you need to login to access playlist') + elif status_code != 0: + raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}') return response['data'] def get_metadata(page_data): @@ -1163,7 +1322,7 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE): 'uploader_id': ('meta', 'mid', {str_or_none}), 'timestamp': ('meta', 'ptime', {int_or_none}), 'thumbnail': ('meta', 'cover', {url_or_none}), - }) + }), } def get_entries(page_data): @@ -1195,7 +1354,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE): mid, sid = self._match_valid_url(url).group('mid', 'sid') playlist_id = f'{mid}_{sid}' playlist_meta = traverse_obj(self._download_json( - f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False + f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False, ), { 'title': ('data', 'meta', 'name', {str}), 'description': ('data', 'meta', 'description', {str}), @@ -1217,7 +1376,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE): 'page_count': math.ceil(entry_count / page_size), 'page_size': page_size, 'uploader': self._get_uploader(mid, playlist_id), - **playlist_meta + **playlist_meta, } def get_entries(page_data): @@ -1241,7 +1400,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE): 'upload_date': '20201109', 'modified_timestamp': int, 'modified_date': str, - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', 'view_count': int, 'like_count': int, }, @@ -1281,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE): _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)' _TESTS = [{ 'url': 'https://www.bilibili.com/watchlater/#/list', - 'info_dict': {'id': 'watchlater'}, + 'info_dict': { + 'id': r're:\d+', + 'title': '稍后再看', + }, 'playlist_mincount': 0, 'skip': 'login required', }] @@ -1345,7 +1507,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'uploader_id': '84912', 'timestamp': 1604905176, 'upload_date': '20201109', - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', }, 'playlist_mincount': 22, }, { @@ -1357,21 +1519,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'skip': 'redirect url', }, { 'url': 'https://www.bilibili.com/list/watchlater', - 'info_dict': {'id': 'watchlater'}, + 'info_dict': { + 'id': r're:2_\d+', + 'title': '稍后再看', + 'uploader': str, + 'uploader_id': str, + }, 'playlist_mincount': 0, 'skip': 'login required', }, { 'url': 'https://www.bilibili.com/medialist/play/watchlater', 'info_dict': {'id': 'watchlater'}, 'playlist_mincount': 0, - 'skip': 'login required', + 'skip': 'redirect url & login required', }] def _extract_medialist(self, query, list_id): for page_num in itertools.count(1): page_data = self._download_json( 'https://api.bilibili.com/x/v2/medialist/resource/list', - list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}' + list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}', )['data'] yield from self._get_entries(page_data, 'media_list', ending_key='bv_id') query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id')) @@ -1407,7 +1574,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'tid': ('tid', {int_or_none}), 'sort_field': ('sortFiled', {int_or_none}), 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}), - }) + }), } metadata = { 'id': f'{query["type"]}_{query["biz_id"]}', @@ -1415,7 +1582,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'title': ('title', {str}), 'uploader': ('upper', 'name', {str}), 'uploader_id': ('upper', 'mid', {str_or_none}), - 'timestamp': ('ctime', {int_or_none}), + 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}), 'thumbnail': ('cover', {url_or_none}), })), } @@ -1430,26 +1597,26 @@ class BilibiliCategoryIE(InfoExtractor): 'url': 'https://www.bilibili.com/v/kichiku/mad', 'info_dict': { 'id': 'kichiku: mad', - 'title': 'kichiku: mad' + 'title': 'kichiku: mad', }, 'playlist_mincount': 45, 'params': { - 'playlistend': 45 - } + 'playlistend': 45, + }, }] def _fetch_page(self, api_url, num_pages, query, page_num): parsed_json = self._download_json( api_url, query, query={'Search_key': query, 'pn': page_num}, - note='Extracting results from page %s of %s' % (page_num, num_pages)) + note=f'Extracting results from page {page_num} of {num_pages}') video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list) if not video_list: - raise ExtractorError('Failed to retrieve video list for page %d' % page_num) + raise ExtractorError(f'Failed to retrieve video list for page {page_num}') for video in video_list: yield self.url_result( - 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid']) + 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid']) def _entries(self, category, subcategory, query): # map of categories : subcategories : RIDs @@ -1459,7 +1626,7 @@ class BilibiliCategoryIE(InfoExtractor): 'manual_vocaloid': 126, 'guide': 22, 'theatre': 216, - 'course': 127 + 'course': 127, }, } @@ -1485,7 +1652,7 @@ class BilibiliCategoryIE(InfoExtractor): def _real_extract(self, url): category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4] - query = '%s: %s' % (category, subcategory) + query = f'{category}: {subcategory}' return self.playlist_result(self._entries(category, subcategory, query), query, query) @@ -1588,7 +1755,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): formats = [{ 'url': play_data['cdns'][0], 'filesize': int_or_none(play_data.get('size')), - 'vcodec': 'none' + 'vcodec': 'none', }] for a_format in formats: @@ -1606,7 +1773,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): subtitles = { 'origin': [{ 'url': lyric, - }] + }], } return { @@ -1674,7 +1841,7 @@ class BiliBiliPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) return self.url_result( - 'http://www.bilibili.tv/video/av%s/' % video_id, + f'http://www.bilibili.tv/video/av{video_id}/', ie=BiliBiliIE.ie_key(), video_id=video_id) @@ -1702,11 +1869,10 @@ class BiliIntlBaseIE(InfoExtractor): return json.get('data') def json2srt(self, json): - data = '\n\n'.join( + return '\n\n'.join( f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}' for i, line in enumerate(traverse_obj(json, ( 'body', lambda _, l: l['content'] and l['from'] and l['to'])))) - return data def _get_subtitles(self, *, ep_id=None, aid=None): sub_json = self._call_api( @@ -1808,14 +1974,15 @@ class BiliIntlBaseIE(InfoExtractor): note='Downloading login key', errnote='Unable to download login key')['data'] public_key = Cryptodome.RSA.importKey(key_data['key']) - password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) + password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode()) login_post = self._download_json( - 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ + 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, + data=urlencode_postdata({ 'username': username, 'password': base64.b64encode(password_hash).decode('ascii'), 'keep_me': 'true', 's_locale': 'en_US', - 'isTrusted': 'true' + 'isTrusted': 'true', }), note='Logging in', errnote='Unable to log in') if login_post.get('code'): if login_post.get('message'): @@ -1842,17 +2009,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 76.242, - 'title': '' + 'title': '', }, { 'start_time': 76.242, 'end_time': 161.161, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1325.742, 'end_time': 1403.903, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Non-Bstation page 'url': 'https://www.bilibili.tv/en/play/1033760/11005006', @@ -1869,17 +2036,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 88.0, - 'title': '' + 'title': '', }, { 'start_time': 88.0, 'end_time': 156.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1173.0, 'end_time': 1259.535, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Subtitle with empty content 'url': 'https://www.bilibili.tv/en/play/1005144/10131790', @@ -1890,7 +2057,7 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 140, }, - 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.' + 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.', }, { # episode comment extraction 'url': 'https://www.bilibili.tv/en/play/34580/340317', @@ -1908,20 +2075,20 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 61.0, - 'title': '' + 'title': '', }, { 'start_time': 61.0, 'end_time': 134.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1290.0, 'end_time': 1379.0, - 'title': 'Outro' + 'title': 'Outro', }], }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # user generated content comment extraction 'url': 'https://www.bilibili.tv/en/video/2045730385', @@ -1936,8 +2103,8 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg', }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # episode id without intro and outro 'url': 'https://www.bilibili.tv/en/play/1048837/11246489', @@ -1992,7 +2159,7 @@ class BiliIntlIE(BiliIntlBaseIE): # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) video_data = traverse_obj(season_json, ( - 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id + 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id, ), expected_type=dict, get_all=False) # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found @@ -2024,7 +2191,7 @@ class BiliIntlIE(BiliIntlBaseIE): 'id': replies.get('rpid'), 'like_count': int_or_none(replies.get('like_count')), 'parent': replies.get('parent'), - 'timestamp': unified_timestamp(replies.get('ctime_text')) + 'timestamp': unified_timestamp(replies.get('ctime_text')), } if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')): @@ -2077,11 +2244,11 @@ class BiliIntlIE(BiliIntlBaseIE): chapters = [{ 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000), - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000), - 'title': 'Outro' + 'title': 'Outro', }] return { @@ -2137,12 +2304,13 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): episode_id = str(episode['episode_id']) yield self.url_result(smuggle_url( BiliIntlIE._make_url(episode_id, series_id), - self._parse_video_metadata(episode) + self._parse_video_metadata(episode), ), BiliIntlIE, episode_id) def _real_extract(self, url): series_id = self._match_id(url) - series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {} + series_info = self._call_api( + f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {} return self.playlist_result( self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'), categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none), @@ -2156,19 +2324,19 @@ class BiliLiveIE(InfoExtractor): 'url': 'https://live.bilibili.com/196', 'info_dict': { 'id': '33989', - 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)", + 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)', 'ext': 'flv', - 'title': "太空狼人杀联动,不被爆杀就算赢", - 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg", + 'title': '太空狼人杀联动,不被爆杀就算赢', + 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg', 'timestamp': 1650802769, }, - 'skip': 'not live' + 'skip': 'not live', }, { 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://live.bilibili.com/blanc/196', - 'only_matching': True + 'only_matching': True, }] _FORMATS = { @@ -2209,7 +2377,7 @@ class BiliLiveIE(InfoExtractor): raise ExtractorError('Streamer is not live', expected=True) formats = [] - for qn in self._FORMATS.keys(): + for qn in self._FORMATS: stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, { 'room_id': room_id, 'qn': qn, diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index 194bf1f..c83222e 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -24,7 +24,7 @@ from ..utils import ( class BitChuteIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P[^/?#&]+)' _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P{_VALID_URL})'] _TESTS = [{ 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', @@ -39,7 +39,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20170103', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, }, { # test case: video with different channel and uploader @@ -55,7 +55,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20231106', 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', 'channel': 'Full Measure with Sharyl Attkisson', - 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/' + 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/', }, }, { # video not downloadable in browser, but we can recover it @@ -72,7 +72,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20181113', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, 'params': {'check_formats': None}, }, { @@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor): }, { 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', 'only_matching': True, + }, { + 'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/', + 'only_matching': True, }] _GEO_BYPASS = False @@ -115,7 +118,7 @@ class BitChuteIE(InfoExtractor): continue return { 'url': url, - 'filesize': int_or_none(response.headers.get('Content-Length')) + 'filesize': int_or_none(response.headers.get('Content-Length')), } def _raise_if_restricted(self, webpage): @@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) + f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) self._raise_if_restricted(webpage) publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) @@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor): class BitChuteChannelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?Pchannel|playlist)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?Pchannel|playlist)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bitchute.com/channel/bitchute/', 'info_dict': { 'id': 'bitchute', 'title': 'BitChute', - 'description': 'md5:5329fb3866125afa9446835594a9b138', + 'description': 'md5:2134c37d64fc3a4846787c402956adac', }, 'playlist': [ { @@ -196,7 +199,7 @@ class BitChuteChannelIE(InfoExtractor): 'duration': 16, 'view_count': int, }, - } + }, ], 'params': { 'skip_download': True, @@ -209,7 +212,10 @@ class BitChuteChannelIE(InfoExtractor): 'id': 'wV9Imujxasw9', 'title': 'Bruce MacDonald and "The Light of Darkness"', 'description': 'md5:747724ef404eebdfc04277714f81863e', - } + }, + }, { + 'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/', + 'only_matching': True, }] _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' @@ -224,13 +230,13 @@ class BitChuteChannelIE(InfoExtractor): 'container': 'playlist-video', 'title': 'title', 'description': 'description', - } + }, } @staticmethod def _make_url(playlist_id, playlist_type): - return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/' + return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/' def _fetch_page(self, playlist_id, playlist_type, page_num): playlist_url = self._make_url(playlist_id, playlist_type) diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 8f41c89..5358909 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -47,7 +47,7 @@ class BlackboardCollaborateIE(InfoExtractor): region = mobj.group('region') video_id = mobj.group('id') info = self._download_json( - 'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id) + f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) duration = info.get('duration') title = info['name'] upload_date = info.get('created') diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index aa3d63e..71b237d 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -44,7 +44,7 @@ class BleacherReportIE(InfoExtractor): def _real_extract(self, url): article_id = self._match_id(url) - article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] + article_data = self._download_json(f'http://api.bleacherreport.com/api/v1/articles/{article_id}', article_id)['article'] thumbnails = [] primary_photo = article_data.get('primaryPhoto') @@ -71,11 +71,11 @@ class BleacherReportIE(InfoExtractor): if video: video_type = video['type'] if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): - info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] + info['url'] = 'http://bleacherreport.com/video_embed?id={}'.format(video['id']) elif video_type == 'youtube.com': info['url'] = video['id'] elif video_type == 'vine.co': - info['url'] = 'https://vine.co/v/%s' % video['id'] + info['url'] = 'https://vine.co/v/{}'.format(video['id']) else: info['url'] = video_type + video['id'] return info @@ -99,12 +99,12 @@ class BleacherReportCMSIE(AMPIE): }, 'expected_warnings': [ - 'Unable to download f4m manifest' - ] + 'Unable to download f4m manifest', + ], }] def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) + info = self._extract_feed_info(f'http://vid.bleacherreport.com/videos/{video_id}.akamai') info['id'] = video_id return info diff --git a/yt_dlp/extractor/blerp.py b/yt_dlp/extractor/blerp.py index 4631ad2..f4f2248 100644 --- a/yt_dlp/extractor/blerp.py +++ b/yt_dlp/extractor/blerp.py @@ -16,7 +16,7 @@ class BlerpIE(InfoExtractor): 'uploader_id': '5fb81e51aa66ae000c395478', 'ext': 'mp3', 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'], - } + }, }, { 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f', 'info_dict': { @@ -25,11 +25,11 @@ class BlerpIE(InfoExtractor): 'uploader': '179617322678353920', 'uploader_id': '5ba99cf71386730004552c42', 'ext': 'mp3', - 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'] - } + 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'], + }, }] - _GRAPHQL_OPERATIONNAME = "webBitePageGetBite" + _GRAPHQL_OPERATIONNAME = 'webBitePageGetBite' _GRAPHQL_QUERY = ( '''query webBitePageGetBite($_id: MongoID!) { web { @@ -141,27 +141,26 @@ class BlerpIE(InfoExtractor): 'operationName': self._GRAPHQL_OPERATIONNAME, 'query': self._GRAPHQL_QUERY, 'variables': { - '_id': audio_id - } + '_id': audio_id, + }, } headers = { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', } - json_result = self._download_json('https://api.blerp.com/graphql', - audio_id, data=json.dumps(data).encode('utf-8'), headers=headers) + json_result = self._download_json( + 'https://api.blerp.com/graphql', audio_id, + data=json.dumps(data).encode(), headers=headers) bite_json = json_result['data']['web']['biteById'] - info_dict = { + return { 'id': bite_json['_id'], 'url': bite_json['audio']['mp3']['url'], 'title': bite_json['title'], 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none), 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none), 'ext': 'mp3', - 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None) + 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None), } - - return info_dict diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index ef0151d..1614b6f 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -21,14 +21,14 @@ class BloggerIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': r're:^https?://.*', 'duration': 76.068, - } + }, }] def _real_extract(self, url): token_id = self._match_id(url) webpage = self._download_webpage(url, token_id) data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data') - data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id) + data = self._parse_json(data_json.encode().decode('unicode_escape'), token_id) streams = data['streams'] formats = [{ 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))), diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index 792155e..ec6b7a8 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -55,7 +55,7 @@ class BloombergIE(InfoExtractor): title = re.sub(': Video$', '', self._og_search_title(webpage)) embed_info = self._download_json( - 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id) + f'http://www.bloomberg.com/multimedia/api/embed?id={video_id}', video_id) formats = [] for stream in embed_info['streams']: stream_url = stream.get('url') diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py index ca326f2..5fe937a 100644 --- a/yt_dlp/extractor/bokecc.py +++ b/yt_dlp/extractor/bokecc.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_parse_qs from ..utils import ExtractorError @@ -9,20 +10,18 @@ class BokeCCBaseIE(InfoExtractor): r'<(?:script|embed)[^>]+src=(?P["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P.+?)(?P=q)', webpage, 'player params', group='query') - player_params = compat_parse_qs(player_params_str) + player_params = urllib.parse.parse_qs(player_params_str) info_xml = self._download_xml( - 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( + 'http://p.bokecc.com/servlet/playinfo?uid={}&vid={}&m=1'.format( player_params['siteid'][0], player_params['vid'][0]), video_id) - formats = [{ + return [{ 'format_id': format_id, 'url': quality.find('./copy').attrib['playurl'], 'quality': int(quality.attrib['value']), } for quality in info_xml.findall('./video/quality')] - return formats - class BokeCCIE(BokeCCBaseIE): _IE_DESC = 'CC视频' @@ -38,11 +37,11 @@ class BokeCCIE(BokeCCBaseIE): }] def _real_extract(self, url): - qs = compat_parse_qs(self._match_valid_url(url).group('query')) + qs = urllib.parse.parse_qs(self._match_valid_url(url).group('query')) if not qs.get('vid') or not qs.get('uid'): raise ExtractorError('Invalid URL', expected=True) - video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0]) + video_id = '{}_{}'.format(qs['uid'][0], qs['vid'][0]) webpage = self._download_webpage(url, video_id) diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index bf95566..ab85477 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, try_get, @@ -38,7 +37,7 @@ class BongaCamsIE(InfoExtractor): channel_id = mobj.group('id') amf = self._download_json( - 'https://%s/tools/amf.php' % host, channel_id, + f'https://{host}/tools/amf.php', channel_id, data=urlencode_postdata(( ('method', 'getRoomData'), ('args[]', channel_id), @@ -48,14 +47,14 @@ class BongaCamsIE(InfoExtractor): server_url = amf['localData']['videoServerUrl'] uploader_id = try_get( - amf, lambda x: x['performerData']['username'], compat_str) or channel_id + amf, lambda x: x['performerData']['username'], str) or channel_id uploader = try_get( - amf, lambda x: x['performerData']['displayName'], compat_str) + amf, lambda x: x['performerData']['displayName'], str) like_count = int_or_none(try_get( amf, lambda x: x['performerData']['loversCount'])) formats = self._extract_m3u8_formats( - '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id), + f'{server_url}/hls/stream_{uploader_id}/playlist.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True) return { diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 2675866..f5b8196 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -57,8 +57,7 @@ class BostonGlobeIE(InfoExtractor): if video_id and account_id and player_id and embed: entries.append( - 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' - % (account_id, player_id, embed, video_id)) + f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}') if len(entries) == 0: return self.url_result(url, 'Generic') diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 008c011..3547ad9 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -72,20 +72,20 @@ class BoxIE(InfoExtractor): 'BoxApi': 'shared_link=' + shared_link, 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats }, query={ - 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size' + 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size', }) title = f['name'] query = { 'access_token': access_token, - 'shared_link': shared_link + 'shared_link': shared_link, } formats = [] for url_tmpl in traverse_obj(f, ( 'representations', 'entries', lambda _, v: v['representation'] == 'dash', - 'content', 'url_template', {url_or_none} + 'content', 'url_template', {url_or_none}, )): manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query) fmts = self._extract_mpd_formats(manifest_url, file_id) diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py index da06cc3..efa6699 100644 --- a/yt_dlp/extractor/boxcast.py +++ b/yt_dlp/extractor/boxcast.py @@ -21,7 +21,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }, { 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad', 'info_dict': { @@ -30,8 +30,8 @@ class BoxCastVideoIE(InfoExtractor): 'uploader_id': 'vctwevwntun3o0ikq7af', 'uploader': 'Legacy Christian Church', 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools', - 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg' - } + 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg', + }, }, { 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev', 'info_dict': { @@ -44,7 +44,7 @@ class BoxCastVideoIE(InfoExtractor): 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland', 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340', 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022', - } + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://childrenshealthdefense.eu/live-stream/', @@ -57,7 +57,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 6e1c63e..0568e06 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -61,7 +61,7 @@ class BRIE(InfoExtractor): 'title': 'Umweltbewusster Häuslebauer', 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2', 'duration': 116, - } + }, }, { 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', @@ -74,7 +74,7 @@ class BRIE(InfoExtractor): 'duration': 893, 'uploader': 'Eva Maria Steimle', 'upload_date': '20170208', - } + }, }, ] @@ -142,7 +142,7 @@ class BRIE(InfoExtractor): http_format_info = format_info.copy() http_format_info.update({ 'url': format_url, - 'format_id': 'http-%s' % asset_type, + 'format_id': f'http-{asset_type}', }) formats.append(http_format_info) server_prefix = xpath_text(asset, 'serverPrefix') @@ -151,7 +151,7 @@ class BRIE(InfoExtractor): rtmp_format_info.update({ 'url': server_prefix, 'play_path': xpath_text(asset, 'fileName'), - 'format_id': 'rtmp-%s' % asset_type, + 'format_id': f'rtmp-{asset_type}', }) formats.append(rtmp_format_info) return formats diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py index 04b1dd8..df10299 100644 --- a/yt_dlp/extractor/brainpop.py +++ b/yt_dlp/extractor/brainpop.py @@ -52,8 +52,8 @@ class BrainPOPBaseIE(InfoExtractor): '%s': {}, 'ad_%s': { 'format_note': 'Audio description', - 'source_preference': -2 - } + 'source_preference': -2, + }, } for additional_key_format, additional_key_fields in additional_key_formats.items(): for key_quality, key_index in enumerate(('high', 'low')): @@ -62,7 +62,7 @@ class BrainPOPBaseIE(InfoExtractor): formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, { 'quality': -1 - key_quality, **additional_key_fields, - **extra_fields + **extra_fields, })) return formats @@ -72,7 +72,7 @@ class BrainPOPBaseIE(InfoExtractor): data=json.dumps({'username': username, 'password': password}).encode(), headers={ 'Content-Type': 'application/json', - 'Referer': self._ORIGIN + 'Referer': self._ORIGIN, }, note='Logging in', errnote='Unable to log in', expected_status=400) status_code = int_or_none(login_res['status_code']) if status_code != 1505: @@ -131,12 +131,12 @@ class BrainPOPIE(BrainPOPBaseIE): formats, subtitles = [], {} formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', { 'language': movie_feature.get('language') or 'en', - 'language_preference': 10 + 'language_preference': 10, })) for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items(): formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', { 'language': lang, - 'language_preference': -10 + 'language_preference': -10, })) # TODO: Do localization fields also have subtitles? @@ -145,7 +145,7 @@ class BrainPOPIE(BrainPOPBaseIE): r'^subtitles_(?P\w+)$', name, 'subtitle metadata', default=None) if lang and url: subtitles.setdefault(lang, []).append({ - 'url': urljoin(self._CDN_URL, url) + 'url': urljoin(self._CDN_URL, url), }) return { diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py index 419fe8c..ec72f0d 100644 --- a/yt_dlp/extractor/bravotv.py +++ b/yt_dlp/extractor/bravotv.py @@ -185,5 +185,5 @@ class BravoTVIE(AdobePassIE): 'episode_number': ('episodeNumber', {int_or_none}), 'episode': 'episodeTitle', 'series': 'show', - })) + })), } diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index b5abb7f..fedf477 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -13,7 +13,7 @@ class BreitBartIE(InfoExtractor): 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', 'age_limit': 0, - } + }, }, { 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', 'only_matching': True, @@ -30,5 +30,5 @@ class BreitBartIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'age_limit': self._rta_search(webpage), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 4190e1a..2526f25 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -1,15 +1,12 @@ import base64 import re import struct +import urllib.parse import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_parse_qs, - compat_urlparse, -) +from ..compat import compat_etree_fromstring from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -21,6 +18,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, int_or_none, + join_nonempty, js_to_json, mimetype2ext, parse_iso8601, @@ -142,7 +140,7 @@ class BrightcoveLegacyIE(InfoExtractor): # from http://www.un.org/chinese/News/story.asp?NewsID=27724 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', 'only_matching': True, # Tested in GenericIE - } + }, ] _WEBPAGE_TESTS = [{ @@ -315,7 +313,7 @@ class BrightcoveLegacyIE(InfoExtractor): object_str = fix_xml_ampersands(object_str) try: - object_doc = compat_etree_fromstring(object_str.encode('utf-8')) + object_doc = compat_etree_fromstring(object_str.encode()) except xml.etree.ElementTree.ParseError: return @@ -323,7 +321,7 @@ class BrightcoveLegacyIE(InfoExtractor): if fv_el is not None: flashvars = dict( (k, v[0]) - for k, v in compat_parse_qs(fv_el.attrib['value']).items()) + for k, v in urllib.parse.parse_qs(fv_el.attrib['value']).items()) else: flashvars = {} @@ -340,32 +338,32 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') or find_param('playerId') - if playerID is None: + player_id = find_param('playerID') or find_param('playerId') + if player_id is None: raise ExtractorError('Cannot find player ID') - params['playerID'] = playerID + params['playerID'] = player_id - playerKey = find_param('playerKey') + player_key = find_param('playerKey') # Not all pages define this value - if playerKey is not None: - params['playerKey'] = playerKey + if player_key is not None: + params['playerKey'] = player_key # These fields hold the id of the video - videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') - if videoPlayer is not None: - if isinstance(videoPlayer, list): - videoPlayer = videoPlayer[0] - videoPlayer = videoPlayer.strip() + video_player = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') + if video_player is not None: + if isinstance(video_player, list): + video_player = video_player[0] + video_player = video_player.strip() # UUID is also possible for videoPlayer (e.g. # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd # or http://www8.hp.com/cn/zh/home.html) if not (re.match( r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', - videoPlayer) or videoPlayer.startswith('ref:')): + video_player) or video_player.startswith('ref:')): return None - params['@videoPlayer'] = videoPlayer - linkBase = find_param('linkBaseURL') - if linkBase is not None: - params['linkBaseURL'] = linkBase + params['@videoPlayer'] = video_player + link_base = find_param('linkBaseURL') + if link_base is not None: + params['linkBaseURL'] = link_base return cls._make_brightcove_url(params) @classmethod @@ -389,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor): @classmethod def _make_brightcove_url(cls, params): return update_url_query( - 'http://c.brightcove.com/services/viewer/htmlFederated', params) + 'https://c.brightcove.com/services/viewer/htmlFederated', params) @classmethod def _extract_brightcove_url(cls, webpage): @@ -448,13 +446,13 @@ class BrightcoveLegacyIE(InfoExtractor): url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = self._match_valid_url(url) query_str = mobj.group('query') - query = compat_urlparse.parse_qs(query_str) + query = urllib.parse.parse_qs(query_str) - videoPlayer = query.get('@videoPlayer') - if videoPlayer: + video_player = query.get('@videoPlayer') + if video_player: # We set the original url as the default 'Referer' header referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url) - video_id = videoPlayer[0] + video_id = video_player[0] if 'playerID' not in query: mobj = re.search(r'/bcpid(\d+)', url) if mobj is not None: @@ -473,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor): if referer: headers['Referer'] = referer player_page = self._download_webpage( - 'http://link.brightcove.com/services/player/bcpid' + player_id[0], + 'https://link.brightcove.com/services/player/bcpid' + player_id[0], video_id, headers=headers, fatal=False) if player_page: player_key = self._search_regex( @@ -483,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor): enc_pub_id = player_key.split(',')[1].replace('~', '=') publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] if publisher_id: - brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}' if referer: brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) @@ -541,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE): }) def build_format_id(kind): - format_id = kind - if tbr: - format_id += '-%dk' % int(tbr) - if height: - format_id += '-%dp' % height - return format_id + return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p') if src or streaming_src: f.update({ @@ -654,7 +647,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # playlist stream 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', @@ -666,7 +659,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', 'only_matching': True, @@ -804,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): # Look for iframe embeds [1] for _, url in re.findall( r']+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): - entries.append(url if url.startswith('http') else 'http:' + url) + entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url) # Look for