summaryrefslogtreecommitdiffstats
path: root/yt_dlp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--yt_dlp/YoutubeDL.py57
-rw-r--r--yt_dlp/cookies.py11
-rw-r--r--yt_dlp/extractor/_extractors.py1068
-rw-r--r--yt_dlp/extractor/abc.py4
-rw-r--r--yt_dlp/extractor/abematv.py5
-rw-r--r--yt_dlp/extractor/acfun.py4
-rw-r--r--yt_dlp/extractor/adn.py4
-rw-r--r--yt_dlp/extractor/adobetv.py4
-rw-r--r--yt_dlp/extractor/afreecatv.py68
-rw-r--r--yt_dlp/extractor/airtv.py2
-rw-r--r--yt_dlp/extractor/allstar.py1
-rw-r--r--yt_dlp/extractor/alphaporno.py4
-rw-r--r--yt_dlp/extractor/alura.py16
-rw-r--r--yt_dlp/extractor/amara.py2
-rw-r--r--yt_dlp/extractor/amp.py2
-rw-r--r--yt_dlp/extractor/anchorfm.py2
-rw-r--r--yt_dlp/extractor/angel.py2
-rw-r--r--yt_dlp/extractor/appleconnect.py5
-rw-r--r--yt_dlp/extractor/appletrailers.py2
-rw-r--r--yt_dlp/extractor/arnes.py2
-rw-r--r--yt_dlp/extractor/arte.py64
-rw-r--r--yt_dlp/extractor/asobistage.py2
-rw-r--r--yt_dlp/extractor/atvat.py2
-rw-r--r--yt_dlp/extractor/awaan.py2
-rw-r--r--yt_dlp/extractor/banbye.py4
-rw-r--r--yt_dlp/extractor/bannedvideo.py6
-rw-r--r--yt_dlp/extractor/bbc.py438
-rw-r--r--yt_dlp/extractor/beeg.py1
-rw-r--r--yt_dlp/extractor/bilibili.py26
-rw-r--r--yt_dlp/extractor/bleacherreport.py2
-rw-r--r--yt_dlp/extractor/blogger.py2
-rw-r--r--yt_dlp/extractor/boosty.py20
-rw-r--r--yt_dlp/extractor/bostonglobe.py1
-rw-r--r--yt_dlp/extractor/boxcast.py6
-rw-r--r--yt_dlp/extractor/brainpop.py2
-rw-r--r--yt_dlp/extractor/brightcove.py4
-rw-r--r--yt_dlp/extractor/brilliantpala.py13
-rw-r--r--yt_dlp/extractor/cableav.py32
-rw-r--r--yt_dlp/extractor/caffeinetv.py74
-rw-r--r--yt_dlp/extractor/canalalpha.py35
-rw-r--r--yt_dlp/extractor/cbc.py24
-rw-r--r--yt_dlp/extractor/cbs.py6
-rw-r--r--yt_dlp/extractor/cda.py62
-rw-r--r--yt_dlp/extractor/ceskatelevize.py2
-rw-r--r--yt_dlp/extractor/cinetecamilano.py1
-rw-r--r--yt_dlp/extractor/clippit.py4
-rw-r--r--yt_dlp/extractor/common.py87
-rw-r--r--yt_dlp/extractor/commonmistakes.py16
-rw-r--r--yt_dlp/extractor/corus.py2
-rw-r--r--yt_dlp/extractor/crackle.py2
-rw-r--r--yt_dlp/extractor/crunchyroll.py171
-rw-r--r--yt_dlp/extractor/cspan.py6
-rw-r--r--yt_dlp/extractor/ctsnews.py2
-rw-r--r--yt_dlp/extractor/dailymail.py2
-rw-r--r--yt_dlp/extractor/damtomo.py2
-rw-r--r--yt_dlp/extractor/dangalplay.py197
-rw-r--r--yt_dlp/extractor/democracynow.py4
-rw-r--r--yt_dlp/extractor/digitalconcerthall.py1
-rw-r--r--yt_dlp/extractor/discoverygo.py2
-rw-r--r--yt_dlp/extractor/disney.py4
-rw-r--r--yt_dlp/extractor/douyutv.py2
-rw-r--r--yt_dlp/extractor/dplay.py45
-rw-r--r--yt_dlp/extractor/drtuber.py2
-rw-r--r--yt_dlp/extractor/duboku.py2
-rw-r--r--yt_dlp/extractor/dvtv.py4
-rw-r--r--yt_dlp/extractor/dw.py2
-rw-r--r--yt_dlp/extractor/einthusan.py105
-rw-r--r--yt_dlp/extractor/eplus.py32
-rw-r--r--yt_dlp/extractor/ertgr.py4
-rw-r--r--yt_dlp/extractor/europa.py22
-rw-r--r--yt_dlp/extractor/euscreen.py3
-rw-r--r--yt_dlp/extractor/eyedotv.py4
-rw-r--r--yt_dlp/extractor/facebook.py2
-rw-r--r--yt_dlp/extractor/fancode.py8
-rw-r--r--yt_dlp/extractor/faz.py2
-rw-r--r--yt_dlp/extractor/fczenit.py2
-rw-r--r--yt_dlp/extractor/fifa.py1
-rw-r--r--yt_dlp/extractor/filmon.py4
-rw-r--r--yt_dlp/extractor/gab.py2
-rw-r--r--yt_dlp/extractor/gamejolt.py2
-rw-r--r--yt_dlp/extractor/gaskrank.py1
-rw-r--r--yt_dlp/extractor/gbnews.py107
-rw-r--r--yt_dlp/extractor/generic.py2
-rw-r--r--yt_dlp/extractor/gettr.py2
-rw-r--r--yt_dlp/extractor/gigya.py1
-rw-r--r--yt_dlp/extractor/glomex.py2
-rw-r--r--yt_dlp/extractor/go.py10
-rw-r--r--yt_dlp/extractor/godresource.py79
-rw-r--r--yt_dlp/extractor/gofile.py5
-rw-r--r--yt_dlp/extractor/googledrive.py36
-rw-r--r--yt_dlp/extractor/gotostage.py9
-rw-r--r--yt_dlp/extractor/hbo.py4
-rw-r--r--yt_dlp/extractor/hearthisat.py46
-rw-r--r--yt_dlp/extractor/hketv.py2
-rw-r--r--yt_dlp/extractor/hrti.py2
-rw-r--r--yt_dlp/extractor/huya.py6
-rw-r--r--yt_dlp/extractor/hytale.py5
-rw-r--r--yt_dlp/extractor/ichinanalive.py2
-rw-r--r--yt_dlp/extractor/infoq.py4
-rw-r--r--yt_dlp/extractor/instagram.py5
-rw-r--r--yt_dlp/extractor/iprima.py6
-rw-r--r--yt_dlp/extractor/iqiyi.py10
-rw-r--r--yt_dlp/extractor/itprotv.py3
-rw-r--r--yt_dlp/extractor/itv.py9
-rw-r--r--yt_dlp/extractor/iwara.py4
-rw-r--r--yt_dlp/extractor/jable.py103
-rw-r--r--yt_dlp/extractor/jamendo.py2
-rw-r--r--yt_dlp/extractor/japandiet.py4
-rw-r--r--yt_dlp/extractor/jiocinema.py403
-rw-r--r--yt_dlp/extractor/jiosaavn.py106
-rw-r--r--yt_dlp/extractor/jove.py5
-rw-r--r--yt_dlp/extractor/jstream.py2
-rw-r--r--yt_dlp/extractor/kakao.py2
-rw-r--r--yt_dlp/extractor/kaltura.py8
-rw-r--r--yt_dlp/extractor/kankanews.py4
-rw-r--r--yt_dlp/extractor/kuwo.py4
-rw-r--r--yt_dlp/extractor/lci.py27
-rw-r--r--yt_dlp/extractor/lcp.py2
-rw-r--r--yt_dlp/extractor/lecture2go.py2
-rw-r--r--yt_dlp/extractor/lecturio.py2
-rw-r--r--yt_dlp/extractor/leeco.py2
-rw-r--r--yt_dlp/extractor/libraryofcongress.py1
-rw-r--r--yt_dlp/extractor/lifenews.py2
-rw-r--r--yt_dlp/extractor/limelight.py2
-rw-r--r--yt_dlp/extractor/linkedin.py2
-rw-r--r--yt_dlp/extractor/mainstreaming.py3
-rw-r--r--yt_dlp/extractor/manoto.py7
-rw-r--r--yt_dlp/extractor/medaltv.py2
-rw-r--r--yt_dlp/extractor/mediaklikk.py7
-rw-r--r--yt_dlp/extractor/mediaset.py4
-rw-r--r--yt_dlp/extractor/mediasite.py5
-rw-r--r--yt_dlp/extractor/microsoftstream.py2
-rw-r--r--yt_dlp/extractor/mildom.py4
-rw-r--r--yt_dlp/extractor/mit.py4
-rw-r--r--yt_dlp/extractor/mixch.py41
-rw-r--r--yt_dlp/extractor/monstercat.py2
-rw-r--r--yt_dlp/extractor/moviepilot.py8
-rw-r--r--yt_dlp/extractor/movingimage.py2
-rw-r--r--yt_dlp/extractor/msn.py2
-rw-r--r--yt_dlp/extractor/n1.py2
-rw-r--r--yt_dlp/extractor/naver.py2
-rw-r--r--yt_dlp/extractor/nba.py2
-rw-r--r--yt_dlp/extractor/nbc.py2
-rw-r--r--yt_dlp/extractor/ndr.py2
-rw-r--r--yt_dlp/extractor/neteasemusic.py3
-rw-r--r--yt_dlp/extractor/nfb.py27
-rw-r--r--yt_dlp/extractor/nfhsnetwork.py8
-rw-r--r--yt_dlp/extractor/nhl.py2
-rw-r--r--yt_dlp/extractor/ninenews.py2
-rw-r--r--yt_dlp/extractor/ninenow.py2
-rw-r--r--yt_dlp/extractor/nitter.py9
-rw-r--r--yt_dlp/extractor/nobelprize.py6
-rw-r--r--yt_dlp/extractor/noz.py6
-rw-r--r--yt_dlp/extractor/nts.py76
-rw-r--r--yt_dlp/extractor/nuevo.py6
-rw-r--r--yt_dlp/extractor/nuvid.py2
-rw-r--r--yt_dlp/extractor/nzherald.py5
-rw-r--r--yt_dlp/extractor/odkmedia.py2
-rw-r--r--yt_dlp/extractor/olympics.py5
-rw-r--r--yt_dlp/extractor/onenewsnz.py6
-rw-r--r--yt_dlp/extractor/onet.py4
-rw-r--r--yt_dlp/extractor/opencast.py2
-rw-r--r--yt_dlp/extractor/openrec.py2
-rw-r--r--yt_dlp/extractor/ora.py1
-rw-r--r--yt_dlp/extractor/orf.py225
-rw-r--r--yt_dlp/extractor/packtpub.py3
-rw-r--r--yt_dlp/extractor/panopto.py10
-rw-r--r--yt_dlp/extractor/paramountplus.py2
-rw-r--r--yt_dlp/extractor/patreon.py181
-rw-r--r--yt_dlp/extractor/pbs.py4
-rw-r--r--yt_dlp/extractor/pearvideo.py2
-rw-r--r--yt_dlp/extractor/peertube.py2
-rw-r--r--yt_dlp/extractor/piapro.py38
-rw-r--r--yt_dlp/extractor/piksel.py2
-rw-r--r--yt_dlp/extractor/pladform.py4
-rw-r--r--yt_dlp/extractor/platzi.py2
-rw-r--r--yt_dlp/extractor/playtvak.py2
-rw-r--r--yt_dlp/extractor/pluralsight.py2
-rw-r--r--yt_dlp/extractor/polsatgo.py2
-rw-r--r--yt_dlp/extractor/porn91.py95
-rw-r--r--yt_dlp/extractor/pornflip.py6
-rw-r--r--yt_dlp/extractor/pornhub.py2
-rw-r--r--yt_dlp/extractor/pornovoisines.py2
-rw-r--r--yt_dlp/extractor/prx.py11
-rw-r--r--yt_dlp/extractor/puhutv.py2
-rw-r--r--yt_dlp/extractor/qingting.py1
-rw-r--r--yt_dlp/extractor/qqmusic.py2
-rw-r--r--yt_dlp/extractor/radiocanada.py2
-rw-r--r--yt_dlp/extractor/radiocomercial.py2
-rw-r--r--yt_dlp/extractor/radiozet.py2
-rw-r--r--yt_dlp/extractor/radlive.py4
-rw-r--r--yt_dlp/extractor/rai.py4
-rw-r--r--yt_dlp/extractor/rbgtum.py2
-rw-r--r--yt_dlp/extractor/rcti.py4
-rw-r--r--yt_dlp/extractor/rds.py4
-rw-r--r--yt_dlp/extractor/redbulltv.py2
-rw-r--r--yt_dlp/extractor/reddit.py63
-rw-r--r--yt_dlp/extractor/redgifs.py2
-rw-r--r--yt_dlp/extractor/redtube.py2
-rw-r--r--yt_dlp/extractor/reuters.py2
-rw-r--r--yt_dlp/extractor/rmcdecouverte.py2
-rw-r--r--yt_dlp/extractor/rte.py2
-rw-r--r--yt_dlp/extractor/rtp.py9
-rw-r--r--yt_dlp/extractor/rtvcplay.py7
-rw-r--r--yt_dlp/extractor/rtvs.py1
-rw-r--r--yt_dlp/extractor/rutube.py2
-rw-r--r--yt_dlp/extractor/rutv.py6
-rw-r--r--yt_dlp/extractor/ruutu.py2
-rw-r--r--yt_dlp/extractor/safari.py1
-rw-r--r--yt_dlp/extractor/scrippsnetworks.py4
-rw-r--r--yt_dlp/extractor/scte.py2
-rw-r--r--yt_dlp/extractor/sendtonews.py6
-rw-r--r--yt_dlp/extractor/seznamzpravy.py2
-rw-r--r--yt_dlp/extractor/shahid.py2
-rw-r--r--yt_dlp/extractor/shemaroome.py2
-rw-r--r--yt_dlp/extractor/sixplay.py2
-rw-r--r--yt_dlp/extractor/skynewsarabia.py2
-rw-r--r--yt_dlp/extractor/sohu.py8
-rw-r--r--yt_dlp/extractor/soundcloud.py108
-rw-r--r--yt_dlp/extractor/sovietscloset.py5
-rw-r--r--yt_dlp/extractor/spankbang.py2
-rw-r--r--yt_dlp/extractor/springboardplatform.py6
-rw-r--r--yt_dlp/extractor/stacommu.py10
-rw-r--r--yt_dlp/extractor/startv.py4
-rw-r--r--yt_dlp/extractor/stitcher.py2
-rw-r--r--yt_dlp/extractor/storyfire.py2
-rw-r--r--yt_dlp/extractor/streamable.py2
-rw-r--r--yt_dlp/extractor/stripchat.py2
-rw-r--r--yt_dlp/extractor/stv.py2
-rw-r--r--yt_dlp/extractor/sunporno.py4
-rw-r--r--yt_dlp/extractor/syfy.py2
-rw-r--r--yt_dlp/extractor/taptap.py275
-rw-r--r--yt_dlp/extractor/tbs.py2
-rw-r--r--yt_dlp/extractor/teachable.py4
-rw-r--r--yt_dlp/extractor/teachertube.py2
-rw-r--r--yt_dlp/extractor/teamcoco.py2
-rw-r--r--yt_dlp/extractor/teamtreehouse.py2
-rw-r--r--yt_dlp/extractor/ted.py5
-rw-r--r--yt_dlp/extractor/tele13.py2
-rw-r--r--yt_dlp/extractor/tele5.py134
-rw-r--r--yt_dlp/extractor/telewebion.py1
-rw-r--r--yt_dlp/extractor/tempo.py2
-rw-r--r--yt_dlp/extractor/tencent.py2
-rw-r--r--yt_dlp/extractor/theguardian.py2
-rw-r--r--yt_dlp/extractor/theintercept.py4
-rw-r--r--yt_dlp/extractor/theplatform.py24
-rw-r--r--yt_dlp/extractor/thisvid.py2
-rw-r--r--yt_dlp/extractor/threeqsdn.py2
-rw-r--r--yt_dlp/extractor/tiktok.py605
-rw-r--r--yt_dlp/extractor/toypics.py3
-rw-r--r--yt_dlp/extractor/triller.py2
-rw-r--r--yt_dlp/extractor/trueid.py4
-rw-r--r--yt_dlp/extractor/tumblr.py2
-rw-r--r--yt_dlp/extractor/turner.py12
-rw-r--r--yt_dlp/extractor/tv2.py4
-rw-r--r--yt_dlp/extractor/tv2hu.py2
-rw-r--r--yt_dlp/extractor/tv5mondeplus.py149
-rw-r--r--yt_dlp/extractor/tva.py44
-rw-r--r--yt_dlp/extractor/tvanouvelles.py2
-rw-r--r--yt_dlp/extractor/tvn24.py2
-rw-r--r--yt_dlp/extractor/tvp.py2
-rw-r--r--yt_dlp/extractor/tvplay.py2
-rw-r--r--yt_dlp/extractor/tvplayer.py2
-rw-r--r--yt_dlp/extractor/tweakers.py2
-rw-r--r--yt_dlp/extractor/twitter.py49
-rw-r--r--yt_dlp/extractor/udn.py2
-rw-r--r--yt_dlp/extractor/ukcolumn.py8
-rw-r--r--yt_dlp/extractor/unsupported.py14
-rw-r--r--yt_dlp/extractor/urplay.py4
-rw-r--r--yt_dlp/extractor/usatoday.py2
-rw-r--r--yt_dlp/extractor/ustream.py4
-rw-r--r--yt_dlp/extractor/ustudio.py2
-rw-r--r--yt_dlp/extractor/veo.py1
-rw-r--r--yt_dlp/extractor/vesti.py2
-rw-r--r--yt_dlp/extractor/vevo.py2
-rw-r--r--yt_dlp/extractor/vice.py4
-rw-r--r--yt_dlp/extractor/vidio.py2
-rw-r--r--yt_dlp/extractor/vidlii.py2
-rw-r--r--yt_dlp/extractor/vimeo.py10
-rw-r--r--yt_dlp/extractor/viu.py6
-rw-r--r--yt_dlp/extractor/vk.py20
-rw-r--r--yt_dlp/extractor/voot.py212
-rw-r--r--yt_dlp/extractor/walla.py2
-rw-r--r--yt_dlp/extractor/washingtonpost.py1
-rw-r--r--yt_dlp/extractor/wdr.py4
-rw-r--r--yt_dlp/extractor/weibo.py2
-rw-r--r--yt_dlp/extractor/whowatch.py4
-rw-r--r--yt_dlp/extractor/wimtv.py2
-rw-r--r--yt_dlp/extractor/wppilot.py10
-rw-r--r--yt_dlp/extractor/wrestleuniverse.py18
-rw-r--r--yt_dlp/extractor/wsj.py2
-rw-r--r--yt_dlp/extractor/xfileshare.py198
-rw-r--r--yt_dlp/extractor/xhamster.py2
-rw-r--r--yt_dlp/extractor/xiaohongshu.py83
-rw-r--r--yt_dlp/extractor/xnxx.py2
-rw-r--r--yt_dlp/extractor/xstream.py4
-rw-r--r--yt_dlp/extractor/xvideos.py39
-rw-r--r--yt_dlp/extractor/xxxymovies.py2
-rw-r--r--yt_dlp/extractor/yandexmusic.py2
-rw-r--r--yt_dlp/extractor/yandexvideo.py10
-rw-r--r--yt_dlp/extractor/youporn.py411
-rw-r--r--yt_dlp/extractor/yourporn.py65
-rw-r--r--yt_dlp/extractor/yourupload.py43
-rw-r--r--yt_dlp/extractor/youtube.py244
-rw-r--r--yt_dlp/extractor/zapiks.py4
-rw-r--r--yt_dlp/extractor/zhihu.py2
-rw-r--r--yt_dlp/extractor/zingmp3.py2
-rw-r--r--yt_dlp/extractor/zype.py2
-rw-r--r--yt_dlp/networking/_curlcffi.py24
-rw-r--r--yt_dlp/networking/_requests.py18
-rw-r--r--yt_dlp/networking/common.py10
-rw-r--r--yt_dlp/options.py2
-rw-r--r--yt_dlp/update.py4
-rw-r--r--yt_dlp/utils/_utils.py38
-rw-r--r--yt_dlp/version.py6
315 files changed, 4851 insertions, 3147 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 9f730d0..2c6f695 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2136,6 +2136,11 @@ class YoutubeDL:
def _check_formats(self, formats):
for f in formats:
+ working = f.get('__working')
+ if working is not None:
+ if working:
+ yield f
+ continue
self.to_screen('[info] Testing format %s' % f['format_id'])
path = self.get_output_path('temp')
if not self._ensure_dir_exists(f'{path}/'):
@@ -2152,33 +2157,44 @@ class YoutubeDL:
os.remove(temp_file.name)
except OSError:
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
+ f['__working'] = success
if success:
yield f
else:
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
+ def _select_formats(self, formats, selector):
+ return list(selector({
+ 'formats': formats,
+ 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
+ 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
+ or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
+ }))
+
def _default_format_spec(self, info_dict, download=True):
+ download = download and not self.params.get('simulate')
+ prefer_best = download and (
+ self.params['outtmpl']['default'] == '-'
+ or info_dict.get('is_live') and not self.params.get('live_from_start'))
def can_merge():
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
- prefer_best = (
- not self.params.get('simulate')
- and download
- and (
- not can_merge()
- or info_dict.get('is_live') and not self.params.get('live_from_start')
- or self.params['outtmpl']['default'] == '-'))
- compat = (
- prefer_best
- or self.params.get('allow_multiple_audio_streams', False)
- or 'format-spec' in self.params['compat_opts'])
-
- return (
- 'best/bestvideo+bestaudio' if prefer_best
- else 'bestvideo*+bestaudio/best' if not compat
- else 'bestvideo+bestaudio/best')
+ if not prefer_best and download and not can_merge():
+ prefer_best = True
+ formats = self._get_formats(info_dict)
+ evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
+ if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
+ self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
+ 'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
+
+ compat = (self.params.get('allow_multiple_audio_streams')
+ or 'format-spec' in self.params['compat_opts'])
+
+ return ('best/bestvideo+bestaudio' if prefer_best
+ else 'bestvideo+bestaudio/best' if compat
+ else 'bestvideo*+bestaudio/best')
def build_format_selector(self, format_spec):
def syntax_error(note, start):
@@ -2928,12 +2944,7 @@ class YoutubeDL:
self.write_debug(f'Default format spec: {req_format}')
format_selector = self.build_format_selector(req_format)
- formats_to_download = list(format_selector({
- 'formats': formats,
- 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
- 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
- or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
- }))
+ formats_to_download = self._select_formats(formats, format_selector)
if interactive_format_selection and not formats_to_download:
self.report_error('Requested format is not available', tb=False, is_error=False)
continue
@@ -3060,7 +3071,7 @@ class YoutubeDL:
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
- 'using %s' % (formats_query, lang, f['ext']))
+ 'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
subs[lang] = f
return subs
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 7b8d215..815897d 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -46,7 +46,7 @@ from .utils import (
from .utils._utils import _YDLLogger
from .utils.networking import normalize_url
-CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
+CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
@@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
+ 'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
}[browser_name]
elif sys.platform == 'darwin':
@@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata, 'Microsoft Edge'),
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
'vivaldi': os.path.join(appdata, 'Vivaldi'),
+ 'whale': os.path.join(appdata, 'Naver/Whale'),
}[browser_name]
else:
@@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(config, 'microsoft-edge'),
'opera': os.path.join(config, 'opera'),
'vivaldi': os.path.join(config, 'vivaldi'),
+ 'whale': os.path.join(config, 'naver-whale'),
}[browser_name]
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
@@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
+ 'whale': 'Whale',
}[browser_name]
browsers_without_profiles = {'opera'}
@@ -347,6 +351,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
if value is None:
return is_encrypted, None
+ # In chrome, session cookies have expires_utc set to 0
+ # In our cookie-store, cookies that do not expire should have expires set to None
+ if not expires_utc:
+ expires_utc = None
+
return is_encrypted, http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 4203427..e9cd38a 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1,4 +1,5 @@
# flake8: noqa: F401
+# isort: off
from .youtube import ( # Youtube is moved to the top to improve performance
YoutubeIE,
@@ -24,6 +25,8 @@ from .youtube import ( # Youtube is moved to the top to improve performance
YoutubeConsentRedirectIE,
)
+# isort: on
+
from .abc import (
ABCIE,
ABCIViewIE,
@@ -43,27 +46,33 @@ from .abematv import (
)
from .academicearth import AcademicEarthCourseIE
from .acast import (
- ACastIE,
ACastChannelIE,
+ ACastIE,
+)
+from .acfun import (
+ AcFunBangumiIE,
+ AcFunVideoIE,
+)
+from .adn import (
+ ADNIE,
+ ADNSeasonIE,
)
-from .acfun import AcFunVideoIE, AcFunBangumiIE
-from .adn import ADNIE, ADNSeasonIE
from .adobeconnect import AdobeConnectIE
from .adobetv import (
+ AdobeTVChannelIE,
AdobeTVEmbedIE,
AdobeTVIE,
AdobeTVShowIE,
- AdobeTVChannelIE,
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
from .aenetworks import (
- AENetworksIE,
AENetworksCollectionIE,
+ AENetworksIE,
AENetworksShowIE,
- HistoryTopicIE,
- HistoryPlayerIE,
BiographyIE,
+ HistoryPlayerIE,
+ HistoryTopicIE,
)
from .aeonco import AeonCoIE
from .afreecatv import (
@@ -79,77 +88,85 @@ from .agora import (
)
from .airtv import AirTVIE
from .aitube import AitubeKZVideoIE
+from .aliexpress import AliExpressLiveIE
from .aljazeera import AlJazeeraIE
+from .allocine import AllocineIE
from .allstar import (
AllstarIE,
AllstarProfileIE,
)
from .alphaporno import AlphaPornoIE
+from .alsace20tv import (
+ Alsace20TVEmbedIE,
+ Alsace20TVIE,
+)
from .altcensored import (
- AltCensoredIE,
AltCensoredChannelIE,
+ AltCensoredIE,
)
from .alura import (
+ AluraCourseIE,
AluraIE,
- AluraCourseIE
)
from .amadeustv import AmadeusTVIE
from .amara import AmaraIE
-from .amcnetworks import AMCNetworksIE
from .amazon import (
- AmazonStoreIE,
AmazonReviewsIE,
+ AmazonStoreIE,
)
from .amazonminitv import (
AmazonMiniTVIE,
AmazonMiniTVSeasonIE,
AmazonMiniTVSeriesIE,
)
+from .amcnetworks import AMCNetworksIE
from .americastestkitchen import (
AmericasTestKitchenIE,
AmericasTestKitchenSeasonIE,
)
from .anchorfm import AnchorFMEpisodeIE
from .angel import AngelIE
+from .antenna import (
+ Ant1NewsGrArticleIE,
+ Ant1NewsGrEmbedIE,
+ AntennaGrWatchIE,
+)
from .anvato import AnvatoIE
from .aol import AolIE
-from .allocine import AllocineIE
-from .aliexpress import AliExpressLiveIE
-from .alsace20tv import (
- Alsace20TVIE,
- Alsace20TVEmbedIE,
-)
from .apa import APAIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
+from .applepodcasts import ApplePodcastsIE
from .appletrailers import (
AppleTrailersIE,
AppleTrailersSectionIE,
)
-from .applepodcasts import ApplePodcastsIE
from .archiveorg import (
ArchiveOrgIE,
YoutubeWebArchiveIE,
)
from .arcpublishing import ArcPublishingIE
-from .arkena import ArkenaIE
from .ard import (
+ ARDIE,
ARDBetaMediathekIE,
ARDMediathekCollectionIE,
- ARDIE,
)
+from .arkena import ArkenaIE
+from .arnes import ArnesIE
from .art19 import (
Art19IE,
Art19ShowIE,
)
from .arte import (
- ArteTVIE,
+ ArteTVCategoryIE,
ArteTVEmbedIE,
+ ArteTVIE,
ArteTVPlaylistIE,
- ArteTVCategoryIE,
)
-from .arnes import ArnesIE
-from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
+from .asobichannel import (
+ AsobiChannelIE,
+ AsobiChannelTagURLIE,
+)
from .asobistage import AsobiStageIE
from .atresplayer import AtresPlayerIE
from .atscaleconf import AtScaleConfEventIE
@@ -160,57 +177,60 @@ from .audiodraft import (
AudiodraftCustomIE,
AudiodraftGenericIE,
)
-from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .audiomack import (
+ AudiomackAlbumIE,
+ AudiomackIE,
+)
from .audius import (
AudiusIE,
- AudiusTrackIE,
AudiusPlaylistIE,
AudiusProfileIE,
+ AudiusTrackIE,
)
from .awaan import (
AWAANIE,
- AWAANVideoIE,
AWAANLiveIE,
AWAANSeasonIE,
+ AWAANVideoIE,
)
from .axs import AxsIE
from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE
from .banbye import (
- BanByeIE,
BanByeChannelIE,
+ BanByeIE,
)
from .bandaichannel import BandaiChannelIE
from .bandcamp import (
- BandcampIE,
BandcampAlbumIE,
- BandcampWeeklyIE,
+ BandcampIE,
BandcampUserIE,
+ BandcampWeeklyIE,
)
from .bannedvideo import BannedVideoIE
from .bbc import (
- BBCCoUkIE,
+ BBCIE,
BBCCoUkArticleIE,
+ BBCCoUkIE,
BBCCoUkIPlayerEpisodesIE,
BBCCoUkIPlayerGroupIE,
BBCCoUkPlaylistIE,
- BBCIE,
)
-from .beeg import BeegIE
-from .behindkink import BehindKinkIE
-from .bellmedia import BellMediaIE
from .beatbump import (
- BeatBumpVideoIE,
BeatBumpPlaylistIE,
+ BeatBumpVideoIE,
)
from .beatport import BeatportIE
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .bellmedia import BellMediaIE
from .berufetv import BerufeTVIE
from .bet import BetIE
from .bfi import BFIPlayerIE
from .bfmtv import (
BFMTVIE,
- BFMTVLiveIE,
BFMTVArticleIE,
+ BFMTVLiveIE,
)
from .bibeltv import (
BibelTVLiveIE,
@@ -221,37 +241,37 @@ from .bigflix import BigflixIE
from .bigo import BigoIE
from .bild import BildIE
from .bilibili import (
- BiliBiliIE,
+ BilibiliAudioAlbumIE,
+ BilibiliAudioIE,
BiliBiliBangumiIE,
- BiliBiliBangumiSeasonIE,
BiliBiliBangumiMediaIE,
+ BiliBiliBangumiSeasonIE,
+ BilibiliCategoryIE,
BilibiliCheeseIE,
BilibiliCheeseSeasonIE,
- BiliBiliSearchIE,
- BilibiliCategoryIE,
- BilibiliAudioIE,
- BilibiliAudioAlbumIE,
- BiliBiliPlayerIE,
- BilibiliSpaceVideoIE,
- BilibiliSpaceAudioIE,
BilibiliCollectionListIE,
- BilibiliSeriesListIE,
BilibiliFavoritesListIE,
- BilibiliWatchlaterIE,
+ BiliBiliIE,
+ BiliBiliPlayerIE,
BilibiliPlaylistIE,
+ BiliBiliSearchIE,
+ BilibiliSeriesListIE,
+ BilibiliSpaceAudioIE,
+ BilibiliSpaceVideoIE,
+ BilibiliWatchlaterIE,
BiliIntlIE,
BiliIntlSeriesIE,
BiliLiveIE,
)
from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
- BitChuteIE,
BitChuteChannelIE,
+ BitChuteIE,
)
from .blackboardcollaborate import BlackboardCollaborateIE
from .bleacherreport import (
- BleacherReportIE,
BleacherReportCMSIE,
+ BleacherReportIE,
)
from .blerp import BlerpIE
from .blogger import BloggerIE
@@ -264,69 +284,69 @@ from .box import BoxIE
from .boxcast import BoxCastVideoIE
from .bpb import BpbIE
from .br import BRIE
-from .bravotv import BravoTVIE
from .brainpop import (
- BrainPOPIE,
- BrainPOPJrIE,
BrainPOPELLIE,
BrainPOPEspIE,
BrainPOPFrIE,
+ BrainPOPIE,
BrainPOPIlIE,
+ BrainPOPJrIE,
)
+from .bravotv import BravoTVIE
from .breitbart import BreitBartIE
from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .brilliantpala import (
- BrilliantpalaElearnIE,
BrilliantpalaClassesIE,
+ BrilliantpalaElearnIE,
)
-from .businessinsider import BusinessInsiderIE
from .bundesliga import BundesligaIE
from .bundestag import BundestagIE
+from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
-from .cableav import CableAVIE
+from .caffeinetv import CaffeineTVIE
from .callin import CallinIE
from .caltrans import CaltransIE
from .cam4 import CAM4IE
from .camdemy import (
+ CamdemyFolderIE,
CamdemyIE,
- CamdemyFolderIE
)
from .camfm import (
CamFMEpisodeIE,
- CamFMShowIE
+ CamFMShowIE,
)
from .cammodels import CamModelsIE
from .camsoda import CamsodaIE
from .camtasia import CamtasiaEmbedIE
from .canal1 import Canal1IE
from .canalalpha import CanalAlphaIE
-from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
+from .canalplus import CanalplusIE
from .caracoltv import CaracolTvPlayIE
from .cartoonnetwork import CartoonNetworkIE
from .cbc import (
CBCIE,
- CBCPlayerIE,
- CBCPlayerPlaylistIE,
CBCGemIE,
- CBCGemPlaylistIE,
CBCGemLiveIE,
+ CBCGemPlaylistIE,
+ CBCPlayerIE,
+ CBCPlayerPlaylistIE,
)
from .cbs import (
CBSIE,
ParamountPressExpressIE,
)
from .cbsnews import (
- CBSNewsEmbedIE,
- CBSNewsIE,
- CBSLocalIE,
CBSLocalArticleIE,
+ CBSLocalIE,
CBSLocalLiveIE,
+ CBSNewsEmbedIE,
+ CBSNewsIE,
CBSNewsLiveIE,
CBSNewsLiveVideoIE,
)
@@ -355,12 +375,12 @@ from .chzzk import (
from .cinemax import CinemaxIE
from .cinetecamilano import CinetecaMilanoIE
from .cineverse import (
- CineverseIE,
CineverseDetailsIE,
+ CineverseIE,
)
from .ciscolive import (
- CiscoLiveSessionIE,
CiscoLiveSearchIE,
+ CiscoLiveSessionIE,
)
from .ciscowebex import CiscoWebexIE
from .cjsw import CJSWIE
@@ -373,21 +393,22 @@ from .cloudycdn import CloudyCDNIE
from .clubic import ClubicIE
from .clyp import ClypIE
from .cmt import CMTIE
-from .cnbc import (
- CNBCVideoIE,
-)
+from .cnbc import CNBCVideoIE
from .cnn import (
CNNIE,
- CNNBlogsIE,
CNNArticleIE,
+ CNNBlogsIE,
CNNIndonesiaIE,
)
-from .coub import CoubIE
from .comedycentral import (
ComedyCentralIE,
ComedyCentralTVIE,
)
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonmistakes import (
+ BlobIE,
+ CommonMistakesIE,
+ UnicodeBOMIE,
+)
from .commonprotocols import (
MmsIE,
RtmpIE,
@@ -396,44 +417,48 @@ from .commonprotocols import (
from .condenast import CondeNastIE
from .contv import CONtvIE
from .corus import CorusIE
+from .coub import CoubIE
+from .cozytv import CozyTVIE
from .cpac import (
CPACIE,
CPACPlaylistIE,
)
-from .cozytv import CozyTVIE
from .cracked import CrackedIE
from .crackle import CrackleIE
from .craftsy import CraftsyIE
from .crooksandliars import CrooksAndLiarsIE
from .crowdbunker import (
- CrowdBunkerIE,
CrowdBunkerChannelIE,
+ CrowdBunkerIE,
)
from .crtvg import CrtvgIE
from .crunchyroll import (
+ CrunchyrollArtistIE,
CrunchyrollBetaIE,
CrunchyrollBetaShowIE,
CrunchyrollMusicIE,
- CrunchyrollArtistIE,
)
-from .cspan import CSpanIE, CSpanCongressIE
+from .cspan import (
+ CSpanCongressIE,
+ CSpanIE,
+)
from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE
from .curiositystream import (
- CuriosityStreamIE,
CuriosityStreamCollectionsIE,
+ CuriosityStreamIE,
CuriosityStreamSeriesIE,
)
from .cwtv import CWTVIE
from .cybrary import (
+ CybraryCourseIE,
CybraryIE,
- CybraryCourseIE
)
from .dacast import (
- DacastVODIE,
DacastPlaylistIE,
+ DacastVODIE,
)
from .dailymail import DailyMailIE
from .dailymotion import (
@@ -450,9 +475,13 @@ from .damtomo import (
DamtomoRecordIE,
DamtomoVideoIE,
)
+from .dangalplay import (
+ DangalPlayIE,
+ DangalPlaySeasonIE,
+)
from .daum import (
- DaumIE,
DaumClipIE,
+ DaumIE,
DaumPlaylistIE,
DaumUserIE,
)
@@ -460,49 +489,69 @@ from .daystar import DaystarClipIE
from .dbtv import DBTVIE
from .dctp import DctpTvIE
from .deezer import (
- DeezerPlaylistIE,
DeezerAlbumIE,
+ DeezerPlaylistIE,
)
from .democracynow import DemocracynowIE
from .detik import DetikEmbedIE
+from .deuxm import (
+ DeuxMIE,
+ DeuxMNewsIE,
+)
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .digitalconcerthall import DigitalConcertHallIE
+from .digiteka import DigitekaIE
+from .discogs import DiscogsReleasePlaylistIE
+from .discovery import DiscoveryIE
+from .disney import DisneyIE
+from .dispeak import DigitallySpeakingIE
from .dlf import (
DLFIE,
DLFCorpusIE,
)
-from .dfb import DFBIE
-from .dhm import DHMIE
+from .dlive import (
+ DLiveStreamIE,
+ DLiveVODIE,
+)
from .douyutv import (
DouyuShowIE,
DouyuTVIE,
)
from .dplay import (
- DPlayIE,
- DiscoveryPlusIE,
- HGTVDeIE,
- GoDiscoveryIE,
- TravelChannelIE,
+ TLCIE,
+ AmHistoryChannelIE,
+ AnimalPlanetIE,
CookingChannelIE,
- HGTVUsaIE,
- FoodNetworkIE,
- InvestigationDiscoveryIE,
DestinationAmericaIE,
- AmHistoryChannelIE,
- ScienceChannelIE,
- DIYNetworkIE,
DiscoveryLifeIE,
- AnimalPlanetIE,
- TLCIE,
- MotorTrendIE,
- MotorTrendOnDemandIE,
- DiscoveryPlusIndiaIE,
DiscoveryNetworksDeIE,
+ DiscoveryPlusIE,
+ DiscoveryPlusIndiaIE,
+ DiscoveryPlusIndiaShowIE,
DiscoveryPlusItalyIE,
DiscoveryPlusItalyShowIE,
- DiscoveryPlusIndiaShowIE,
+ DIYNetworkIE,
+ DPlayIE,
+ FoodNetworkIE,
GlobalCyclingNetworkPlusIE,
+ GoDiscoveryIE,
+ HGTVDeIE,
+ HGTVUsaIE,
+ InvestigationDiscoveryIE,
+ MotorTrendIE,
+ MotorTrendOnDemandIE,
+ ScienceChannelIE,
+ TravelChannelIE,
)
-from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
+from .dreisat import DreiSatIE
+from .drooble import DroobleIE
+from .dropbox import DropboxIE
+from .dropout import (
+ DropoutIE,
+ DropoutSeasonIE,
+)
from .drtuber import DrTuberIE
from .drtv import (
DRTVIE,
@@ -511,32 +560,21 @@ from .drtv import (
DRTVSeriesIE,
)
from .dtube import DTubeIE
-from .dvtv import DVTVIE
from .duboku import (
DubokuIE,
- DubokuPlaylistIE
+ DubokuPlaylistIE,
)
from .dumpert import DumpertIE
-from .deuxm import (
- DeuxMIE,
- DeuxMNewsIE
-)
-from .digitalconcerthall import DigitalConcertHallIE
-from .discogs import DiscogsReleasePlaylistIE
-from .discovery import DiscoveryIE
-from .disney import DisneyIE
-from .dispeak import DigitallySpeakingIE
-from .dropbox import DropboxIE
-from .dropout import (
- DropoutSeasonIE,
- DropoutIE
-)
from .duoplay import DuoplayIE
+from .dvtv import DVTVIE
from .dw import (
DWIE,
DWArticleIE,
)
-from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
+from .eagleplatform import (
+ ClipYouEmbedIE,
+ EaglePlatformIE,
+)
from .ebaumsworld import EbaumsWorldIE
from .ebay import EbayIE
from .egghead import (
@@ -544,7 +582,6 @@ from .egghead import (
EggheadLessonIE,
)
from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
from .eitb import EitbIE
from .elementorembed import ElementorEmbedIE
from .elonet import ElonetIE
@@ -561,8 +598,8 @@ from .epoch import EpochIE
from .eporner import EpornerIE
from .erocast import ErocastIE
from .eroprofile import (
- EroProfileIE,
EroProfileAlbumIE,
+ EroProfileIE,
)
from .err import ERRJupiterIE
from .ertgr import (
@@ -572,31 +609,33 @@ from .ertgr import (
)
from .espn import (
ESPNIE,
- WatchESPNIE,
ESPNArticleIE,
- FiveThirtyEightIE,
ESPNCricInfoIE,
+ FiveThirtyEightIE,
+ WatchESPNIE,
)
from .ettutv import EttuTvIE
-from .europa import EuropaIE, EuroParlWebstreamIE
+from .europa import (
+ EuropaIE,
+ EuroParlWebstreamIE,
+)
from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE
from .euscreen import EUScreenIE
from .expressen import ExpressenIE
from .eyedotv import EyedoTVIE
from .facebook import (
+ FacebookAdsIE,
FacebookIE,
FacebookPluginsVideoIE,
FacebookRedirectURLIE,
FacebookReelIE,
- FacebookAdsIE,
)
-from .fathom import FathomIE
from .fancode import (
+ FancodeLiveIE,
FancodeVodIE,
- FancodeLiveIE
)
-
+from .fathom import FathomIE
from .faz import FazIE
from .fc2 import (
FC2IE,
@@ -606,8 +645,8 @@ from .fc2 import (
from .fczenit import FczenitIE
from .fifa import FifaIE
from .filmon import (
- FilmOnIE,
FilmOnChannelIE,
+ FilmOnIE,
)
from .filmweb import FilmwebIE
from .firsttv import FirstTVIE
@@ -615,17 +654,17 @@ from .fivetv import FiveTVIE
from .flextv import FlexTVIE
from .flickr import FlickrIE
from .floatplane import (
- FloatplaneIE,
FloatplaneChannelIE,
+ FloatplaneIE,
)
from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE
from .formula1 import Formula1IE
from .fourtube import (
FourTubeIE,
- PornTubeIE,
- PornerBrosIE,
FuxIE,
+ PornerBrosIE,
+ PornTubeIE,
)
from .fox import FOXIE
from .fox9 import (
@@ -633,8 +672,8 @@ from .fox9 import (
FOX9NewsIE,
)
from .foxnews import (
- FoxNewsIE,
FoxNewsArticleIE,
+ FoxNewsIE,
FoxNewsVideoIE,
)
from .foxsports import FoxSportsIE
@@ -642,20 +681,20 @@ from .fptplay import FptplayIE
from .franceinter import FranceInterIE
from .francetv import (
FranceTVIE,
- FranceTVSiteIE,
FranceTVInfoIE,
+ FranceTVSiteIE,
)
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
-from .frontendmasters import (
- FrontendMastersIE,
- FrontendMastersLessonIE,
- FrontendMastersCourseIE
-)
from .freetv import (
FreeTvIE,
FreeTvMoviesIE,
)
+from .frontendmasters import (
+ FrontendMastersCourseIE,
+ FrontendMastersIE,
+ FrontendMastersLessonIE,
+)
from .fujitv import FujiTVFODPlus7IE
from .funimation import (
FunimationIE,
@@ -666,32 +705,37 @@ from .funk import FunkIE
from .funker530 import Funker530IE
from .fuyintv import FuyinTVIE
from .gab import (
- GabTVIE,
GabIE,
+ GabTVIE,
)
from .gaia import GaiaIE
from .gamejolt import (
- GameJoltIE,
- GameJoltUserIE,
+ GameJoltCommunityIE,
GameJoltGameIE,
GameJoltGameSoundtrackIE,
- GameJoltCommunityIE,
+ GameJoltIE,
GameJoltSearchIE,
+ GameJoltUserIE,
)
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gaskrank import GaskrankIE
from .gazeta import GazetaIE
+from .gbnews import GBNewsIE
from .gdcvault import GDCVaultIE
from .gedidigital import GediDigitalIE
from .generic import GenericIE
+from .genericembeds import (
+ HTML5MediaEmbedIE,
+ QuotedHTMLIE,
+)
from .genius import (
GeniusIE,
GeniusLyricsIE,
)
from .getcourseru import (
+ GetCourseRuIE,
GetCourseRuPlayerIE,
- GetCourseRuIE
)
from .gettr import (
GettrIE,
@@ -700,40 +744,45 @@ from .gettr import (
from .giantbomb import GiantBombIE
from .glide import GlideIE
from .globalplayer import (
+ GlobalPlayerAudioEpisodeIE,
+ GlobalPlayerAudioIE,
GlobalPlayerLiveIE,
GlobalPlayerLivePlaylistIE,
- GlobalPlayerAudioIE,
- GlobalPlayerAudioEpisodeIE,
- GlobalPlayerVideoIE
+ GlobalPlayerVideoIE,
)
from .globo import (
- GloboIE,
GloboArticleIE,
+ GloboIE,
+)
+from .glomex import (
+ GlomexEmbedIE,
+ GlomexIE,
)
from .gmanetwork import GMANetworkVideoIE
from .go import GoIE
+from .godresource import GodResourceIE
from .godtube import GodTubeIE
from .gofile import GofileIE
from .golem import GolemIE
from .goodgame import GoodGameIE
from .googledrive import (
- GoogleDriveIE,
GoogleDriveFolderIE,
+ GoogleDriveIE,
)
from .googlepodcasts import (
- GooglePodcastsIE,
GooglePodcastsFeedIE,
+ GooglePodcastsIE,
)
from .googlesearch import GoogleSearchIE
-from .gopro import GoProIE
from .goplay import GoPlayIE
+from .gopro import GoProIE
from .goshgay import GoshgayIE
from .gotostage import GoToStageIE
from .gputechconf import GPUTechConfIE
from .gronkh import (
- GronkhIE,
GronkhFeedIE,
- GronkhVodsIE
+ GronkhIE,
+ GronkhVodsIE,
)
from .groupon import GrouponIE
from .harpodeon import HarpodeonIE
@@ -742,10 +791,10 @@ from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
from .hgtv import HGTVComShowIE
-from .hketv import HKETVIE
from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitrecord import HitRecordIE
+from .hketv import HKETVIE
from .hollywoodreporter import (
HollywoodReporterIE,
HollywoodReporterPlaylistIE,
@@ -754,8 +803,8 @@ from .holodex import HolodexIE
from .hotnewhiphop import HotNewHipHopIE
from .hotstar import (
HotStarIE,
- HotStarPrefixIE,
HotStarPlaylistIE,
+ HotStarPrefixIE,
HotStarSeasonIE,
HotStarSeriesIE,
)
@@ -766,34 +815,30 @@ from .hrti import (
HRTiPlaylistIE,
)
from .hse import (
- HSEShowIE,
HSEProductIE,
-)
-from .genericembeds import (
- HTML5MediaEmbedIE,
- QuotedHTMLIE,
+ HSEShowIE,
)
from .huajiao import HuajiaoIE
-from .huya import HuyaLiveIE
from .huffpost import HuffPostIE
from .hungama import (
+ HungamaAlbumPlaylistIE,
HungamaIE,
HungamaSongIE,
- HungamaAlbumPlaylistIE,
)
+from .huya import HuyaLiveIE
from .hypem import HypemIE
from .hypergryph import MonsterSirenHypergryphMusicIE
from .hytale import HytaleIE
from .icareus import IcareusIE
from .ichinanalive import (
- IchinanaLiveIE,
IchinanaLiveClipIE,
+ IchinanaLiveIE,
)
from .idolplus import IdolPlusIE
from .ign import (
IGNIE,
- IGNVideoIE,
IGNArticleIE,
+ IGNVideoIE,
)
from .iheart import (
IHeartRadioIE,
@@ -803,12 +848,12 @@ from .ilpost import IlPostIE
from .iltalehti import IltalehtiIE
from .imdb import (
ImdbIE,
- ImdbListIE
+ ImdbListIE,
)
from .imgur import (
- ImgurIE,
ImgurAlbumIE,
ImgurGalleryIE,
+ ImgurIE,
)
from .ina import InaIE
from .inc import IncIE
@@ -817,20 +862,20 @@ from .infoq import InfoQIE
from .instagram import (
InstagramIE,
InstagramIOSIE,
- InstagramUserIE,
- InstagramTagIE,
InstagramStoryIE,
+ InstagramTagIE,
+ InstagramUserIE,
)
from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import (
+ IPrimaCNNIE,
IPrimaIE,
- IPrimaCNNIE
)
from .iqiyi import (
- IqiyiIE,
+ IqAlbumIE,
IqIE,
- IqAlbumIE
+ IqiyiIE,
)
from .islamchannel import (
IslamChannelIE,
@@ -838,16 +883,16 @@ from .islamchannel import (
)
from .israelnationalnews import IsraelNationalNewsIE
from .itprotv import (
+ ITProTVCourseIE,
ITProTVIE,
- ITProTVCourseIE
)
from .itv import (
- ITVIE,
ITVBTCCIE,
+ ITVIE,
)
from .ivi import (
+ IviCompilationIE,
IviIE,
- IviCompilationIE
)
from .ivideon import IvideonIE
from .iwara import (
@@ -857,30 +902,30 @@ from .iwara import (
)
from .ixigua import IxiguaIE
from .izlesene import IzleseneIE
-from .jable import (
- JableIE,
- JablePlaylistIE,
-)
from .jamendo import (
- JamendoIE,
JamendoAlbumIE,
+ JamendoIE,
)
from .japandiet import (
+ SangiinIE,
+ SangiinInstructionIE,
ShugiinItvLiveIE,
ShugiinItvLiveRoomIE,
ShugiinItvVodIE,
- SangiinInstructionIE,
- SangiinIE,
)
from .jeuxvideo import JeuxVideoIE
+from .jiocinema import (
+ JioCinemaIE,
+ JioCinemaSeriesIE,
+)
from .jiosaavn import (
- JioSaavnSongIE,
JioSaavnAlbumIE,
JioSaavnPlaylistIE,
+ JioSaavnSongIE,
)
-from .jove import JoveIE
from .joj import JojIE
from .joqrag import JoqrAgIE
+from .jove import JoveIE
from .jstream import JStreamIE
from .jtbc import (
JTBCIE,
@@ -907,17 +952,17 @@ from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE
from .kompas import KompasVideoIE
from .koo import KooIE
-from .kth import KTHIE
from .krasview import KrasViewIE
+from .kth import KTHIE
from .ku6 import Ku6IE
from .kukululive import KukuluLiveIE
from .kuwo import (
- KuwoIE,
KuwoAlbumIE,
- KuwoChartIE,
- KuwoSingerIE,
KuwoCategoryIE,
+ KuwoChartIE,
+ KuwoIE,
KuwoMvIE,
+ KuwoSingerIE,
)
from .la7 import (
LA7IE,
@@ -937,14 +982,14 @@ from .lbry import (
)
from .lci import LCIIE
from .lcp import (
- LcpPlayIE,
LcpIE,
+ LcpPlayIE,
)
from .lecture2go import Lecture2GoIE
from .lecturio import (
- LecturioIE,
LecturioCourseIE,
LecturioDeCourseIE,
+ LecturioIE,
)
from .leeco import (
LeIE,
@@ -961,22 +1006,22 @@ from .lenta import LentaIE
from .libraryofcongress import LibraryOfCongressIE
from .libsyn import LibsynIE
from .lifenews import (
- LifeNewsIE,
LifeEmbedIE,
+ LifeNewsIE,
)
from .likee import (
LikeeIE,
- LikeeUserIE
+ LikeeUserIE,
)
from .limelight import (
- LimelightMediaIE,
LimelightChannelIE,
LimelightChannelListIE,
+ LimelightMediaIE,
)
from .linkedin import (
LinkedInIE,
- LinkedInLearningIE,
LinkedInLearningCourseIE,
+ LinkedInLearningIE,
)
from .liputan6 import Liputan6IE
from .listennotes import ListenNotesIE
@@ -993,25 +1038,23 @@ from .lnkgo import (
LnkIE,
)
from .loom import (
- LoomIE,
LoomFolderIE,
+ LoomIE,
)
from .lovehomeporn import LoveHomePornIE
from .lrt import (
LRTVODIE,
- LRTStreamIE
+ LRTStreamIE,
)
from .lsm import (
LSMLREmbedIE,
LSMLTVEmbedIE,
- LSMReplayIE
-)
-from .lumni import (
- LumniIE
+ LSMReplayIE,
)
+from .lumni import LumniIE
from .lynda import (
+ LyndaCourseIE,
LyndaIE,
- LyndaCourseIE
)
from .maariv import MaarivIE
from .magellantv import MagellanTVIE
@@ -1023,13 +1066,13 @@ from .mailru import (
)
from .mainstreaming import MainStreamingIE
from .mangomolo import (
- MangomoloVideoIE,
MangomoloLiveIE,
+ MangomoloVideoIE,
)
from .manoto import (
ManotoTVIE,
- ManotoTVShowIE,
ManotoTVLiveIE,
+ ManotoTVShowIE,
)
from .manyvids import ManyVidsIE
from .maoritv import MaoriTVIE
@@ -1045,13 +1088,14 @@ from .mdr import MDRIE
from .medaltv import MedalTVIE
from .mediaite import MediaiteIE
from .mediaklikk import MediaKlikkIE
+from .medialaan import MedialaanIE
from .mediaset import (
MediasetIE,
MediasetShowIE,
)
from .mediasite import (
- MediasiteIE,
MediasiteCatalogIE,
+ MediasiteIE,
MediasiteNamedCatalogIE,
)
from .mediastream import (
@@ -1061,26 +1105,30 @@ from .mediastream import (
from .mediaworksnz import MediaWorksNZVODIE
from .medici import MediciIE
from .megaphone import MegaphoneIE
+from .megatvcom import (
+ MegaTVComEmbedIE,
+ MegaTVComIE,
+)
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .metacritic import MetacriticIE
from .mgtv import MGTVIE
+from .microsoftembed import MicrosoftEmbedIE
from .microsoftstream import MicrosoftStreamIE
from .microsoftvirtualacademy import (
- MicrosoftVirtualAcademyIE,
MicrosoftVirtualAcademyCourseIE,
+ MicrosoftVirtualAcademyIE,
)
-from .microsoftembed import MicrosoftEmbedIE
from .mildom import (
- MildomIE,
- MildomVodIE,
MildomClipIE,
+ MildomIE,
MildomUserVodIE,
+ MildomVodIE,
)
from .minds import (
- MindsIE,
MindsChannelIE,
MindsGroupIE,
+ MindsIE,
)
from .minoto import MinotoIE
from .mirrativ import (
@@ -1088,31 +1136,34 @@ from .mirrativ import (
MirrativUserIE,
)
from .mirrorcouk import MirrorCoUKIE
-from .mit import TechTVMITIE, OCWMITIE
+from .mit import (
+ OCWMITIE,
+ TechTVMITIE,
+)
from .mitele import MiTeleIE
from .mixch import (
- MixchIE,
MixchArchiveIE,
+ MixchIE,
)
from .mixcloud import (
MixcloudIE,
- MixcloudUserIE,
MixcloudPlaylistIE,
+ MixcloudUserIE,
)
from .mlb import (
MLBIE,
- MLBVideoIE,
MLBTVIE,
MLBArticleIE,
+ MLBVideoIE,
)
from .mlssoccer import MLSSoccerIE
from .mocha import MochaVideoIE
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .motherless import (
- MotherlessIE,
- MotherlessGroupIE,
MotherlessGalleryIE,
+ MotherlessGroupIE,
+ MotherlessIE,
MotherlessUploaderIE,
)
from .motorsport import MotorsportIE
@@ -1122,23 +1173,26 @@ from .moviezine import MoviezineIE
from .movingimage import MovingImageIE
from .msn import MSNIE
from .mtv import (
- MTVIE,
- MTVVideoIE,
- MTVServicesEmbeddedIE,
MTVDEIE,
- MTVJapanIE,
+ MTVIE,
MTVItaliaIE,
MTVItaliaProgrammaIE,
+ MTVJapanIE,
+ MTVServicesEmbeddedIE,
+ MTVVideoIE,
)
from .muenchentv import MuenchenTVIE
-from .murrtube import MurrtubeIE, MurrtubeUserIE
+from .murrtube import (
+ MurrtubeIE,
+ MurrtubeUserIE,
+)
from .museai import MuseAIIE
from .musescore import MuseScoreIE
from .musicdex import (
- MusicdexSongIE,
MusicdexAlbumIE,
MusicdexArtistIE,
MusicdexPlaylistIE,
+ MusicdexSongIE,
)
from .mx3 import (
Mx3IE,
@@ -1149,7 +1203,10 @@ from .mxplayer import (
MxplayerIE,
MxplayerShowIE,
)
-from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspace import (
+ MySpaceAlbumIE,
+ MySpaceIE,
+)
from .myspass import MySpassIE
from .myvideoge import MyVideoGeIE
from .myvidster import MyVidsterIE
@@ -1163,8 +1220,8 @@ from .nate import (
NateProgramIE,
)
from .nationalgeographic import (
- NationalGeographicVideoIE,
NationalGeographicTVIE,
+ NationalGeographicVideoIE,
)
from .naver import (
NaverIE,
@@ -1172,12 +1229,12 @@ from .naver import (
NaverNowIE,
)
from .nba import (
- NBAWatchEmbedIE,
- NBAWatchIE,
- NBAWatchCollectionIE,
- NBAEmbedIE,
NBAIE,
NBAChannelIE,
+ NBAEmbedIE,
+ NBAWatchCollectionIE,
+ NBAWatchEmbedIE,
+ NBAWatchIE,
)
from .nbc import (
NBCIE,
@@ -1191,35 +1248,35 @@ from .nbc import (
)
from .ndr import (
NDRIE,
- NJoyIE,
NDREmbedBaseIE,
NDREmbedIE,
NJoyEmbedIE,
+ NJoyIE,
)
from .ndtv import NDTVIE
from .nebula import (
- NebulaIE,
+ NebulaChannelIE,
NebulaClassIE,
+ NebulaIE,
NebulaSubscriptionsIE,
- NebulaChannelIE,
)
from .nekohacker import NekoHackerIE
from .nerdcubed import NerdCubedFeedIE
-from .netzkino import NetzkinoIE
from .neteasemusic import (
- NetEaseMusicIE,
NetEaseMusicAlbumIE,
- NetEaseMusicSingerIE,
+ NetEaseMusicDjRadioIE,
+ NetEaseMusicIE,
NetEaseMusicListIE,
NetEaseMusicMvIE,
NetEaseMusicProgramIE,
- NetEaseMusicDjRadioIE,
+ NetEaseMusicSingerIE,
)
from .netverse import (
NetverseIE,
NetversePlaylistIE,
NetverseSearchIE,
)
+from .netzkino import NetzkinoIE
from .newgrounds import (
NewgroundsIE,
NewgroundsPlaylistIE,
@@ -1228,14 +1285,14 @@ from .newgrounds import (
from .newspicks import NewsPicksIE
from .newsy import NewsyIE
from .nextmedia import (
- NextMediaIE,
- NextMediaActionNewsIE,
AppleDailyIE,
+ NextMediaActionNewsIE,
+ NextMediaIE,
NextTVIE,
)
from .nexx import (
- NexxIE,
NexxEmbedIE,
+ NexxIE,
)
from .nfb import (
NFBIE,
@@ -1249,43 +1306,43 @@ from .nfl import (
NFLPlusReplayIE,
)
from .nhk import (
- NhkVodIE,
- NhkVodProgramIE,
NhkForSchoolBangumiIE,
- NhkForSchoolSubjectIE,
NhkForSchoolProgramListIE,
+ NhkForSchoolSubjectIE,
NhkRadioNewsPageIE,
NhkRadiruIE,
NhkRadiruLiveIE,
+ NhkVodIE,
+ NhkVodProgramIE,
)
from .nhl import NHLIE
from .nick import (
- NickIE,
NickBrIE,
NickDeIE,
+ NickIE,
NickRuIE,
)
from .niconico import (
+ NiconicoHistoryIE,
NiconicoIE,
+ NiconicoLiveIE,
NiconicoPlaylistIE,
- NiconicoUserIE,
NiconicoSeriesIE,
- NiconicoHistoryIE,
+ NiconicoUserIE,
NicovideoSearchDateIE,
NicovideoSearchIE,
NicovideoSearchURLIE,
NicovideoTagURLIE,
- NiconicoLiveIE,
+)
+from .niconicochannelplus import (
+ NiconicoChannelPlusChannelLivesIE,
+ NiconicoChannelPlusChannelVideosIE,
+ NiconicoChannelPlusIE,
)
from .ninaprotocol import NinaProtocolIE
from .ninecninemedia import (
- NineCNineMediaIE,
CPTwentyFourIE,
-)
-from .niconicochannelplus import (
- NiconicoChannelPlusIE,
- NiconicoChannelPlusChannelVideosIE,
- NiconicoChannelPlusChannelLivesIE,
+ NineCNineMediaIE,
)
from .ninegag import NineGagIE
from .ninenews import NineNewsIE
@@ -1310,46 +1367,47 @@ from .nowness import (
)
from .noz import NozIE
from .npo import (
- AndereTijdenIE,
NPOIE,
+ VPROIE,
+ WNLIE,
+ AndereTijdenIE,
+ HetKlokhuisIE,
NPOLiveIE,
- NPORadioIE,
NPORadioFragmentIE,
+ NPORadioIE,
SchoolTVIE,
- HetKlokhuisIE,
- VPROIE,
- WNLIE,
)
from .npr import NprIE
from .nrk import (
NRKIE,
+ NRKTVIE,
NRKPlaylistIE,
+ NRKRadioPodkastIE,
NRKSkoleIE,
- NRKTVIE,
NRKTVDirekteIE,
- NRKRadioPodkastIE,
NRKTVEpisodeIE,
NRKTVEpisodesIE,
NRKTVSeasonIE,
NRKTVSeriesIE,
)
from .nrl import NRLTVIE
+from .nts import NTSLiveIE
from .ntvcojp import NTVCoJpCUIE
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
from .nubilesporn import NubilesPornIE
-from .nytimes import (
- NYTimesIE,
- NYTimesArticleIE,
- NYTimesCookingIE,
- NYTimesCookingRecipeIE,
-)
from .nuum import (
NuumLiveIE,
- NuumTabIE,
NuumMediaIE,
+ NuumTabIE,
)
from .nuvid import NuvidIE
+from .nytimes import (
+ NYTimesArticleIE,
+ NYTimesCookingIE,
+ NYTimesCookingRecipeIE,
+ NYTimesIE,
+)
from .nzherald import NZHeraldIE
from .nzonscreen import NZOnScreenIE
from .nzz import NZZIE
@@ -1357,7 +1415,7 @@ from .odkmedia import OnDemandChinaEpisodeIE
from .odnoklassniki import OdnoklassnikiIE
from .oftv import (
OfTVIE,
- OfTVPlaylistIE
+ OfTVPlaylistIE,
)
from .oktoberfesttv import OktoberfestTVIE
from .olympics import OlympicsReplayIE
@@ -1370,8 +1428,8 @@ from .onefootball import OneFootballIE
from .onenewsnz import OneNewsNZIE
from .oneplace import OnePlacePodcastIE
from .onet import (
- OnetIE,
OnetChannelIE,
+ OnetIE,
OnetMVPIE,
OnetPlIE,
)
@@ -1381,34 +1439,33 @@ from .opencast import (
OpencastPlaylistIE,
)
from .openrec import (
- OpenRecIE,
OpenRecCaptureIE,
+ OpenRecIE,
OpenRecMovieIE,
)
from .ora import OraTVIE
from .orf import (
- ORFTVthekIE,
- ORFFM4StoryIE,
+ ORFIPTVIE,
ORFONIE,
- ORFRadioIE,
+ ORFFM4StoryIE,
ORFPodcastIE,
- ORFIPTVIE,
+ ORFRadioIE,
)
from .outsidetv import OutsideTVIE
from .owncloud import OwnCloudIE
from .packtpub import (
- PacktPubIE,
PacktPubCourseIE,
+ PacktPubIE,
)
from .palcomp3 import (
- PalcoMP3IE,
PalcoMP3ArtistIE,
+ PalcoMP3IE,
PalcoMP3VideoIE,
)
from .panopto import (
PanoptoIE,
PanoptoListIE,
- PanoptoPlaylistIE
+ PanoptoPlaylistIE,
)
from .paramountplus import (
ParamountPlusIE,
@@ -1417,12 +1474,18 @@ from .paramountplus import (
from .parler import ParlerIE
from .parlview import ParlviewIE
from .patreon import (
+ PatreonCampaignIE,
PatreonIE,
- PatreonCampaignIE
)
-from .pbs import PBSIE, PBSKidsIE
+from .pbs import (
+ PBSIE,
+ PBSKidsIE,
+)
from .pearvideo import PearVideoIE
-from .peekvids import PeekVidsIE, PlayVidsIE
+from .peekvids import (
+ PeekVidsIE,
+ PlayVidsIE,
+)
from .peertube import (
PeerTubeIE,
PeerTubePlaylistIE,
@@ -1430,7 +1493,7 @@ from .peertube import (
from .peertv import PeerTVIE
from .peloton import (
PelotonIE,
- PelotonLiveIE
+ PelotonLiveIE,
)
from .performgroup import PerformGroupIE
from .periscope import (
@@ -1450,8 +1513,8 @@ from .picarto import (
from .piksel import PikselIE
from .pinkbike import PinkbikeIE
from .pinterest import (
- PinterestIE,
PinterestCollectionIE,
+ PinterestIE,
)
from .pixivsketch import (
PixivSketchIE,
@@ -1460,19 +1523,22 @@ from .pixivsketch import (
from .pladform import PladformIE
from .planetmarathi import PlanetMarathiIE
from .platzi import (
- PlatziIE,
PlatziCourseIE,
+ PlatziIE,
)
from .playplustv import PlayPlusTVIE
from .playsuisse import PlaySuisseIE
from .playtvak import PlaytvakIE
from .playwire import PlaywireIE
-from .plutotv import PlutoTVIE
from .pluralsight import (
- PluralsightIE,
PluralsightCourseIE,
+ PluralsightIE,
+)
+from .plutotv import PlutoTVIE
+from .podbayfm import (
+ PodbayFMChannelIE,
+ PodbayFMIE,
)
-from .podbayfm import PodbayFMIE, PodbayFMChannelIE
from .podchaser import PodchaserIE
from .podomatic import PodomaticIE
from .pokemon import (
@@ -1480,77 +1546,81 @@ from .pokemon import (
PokemonWatchIE,
)
from .pokergo import (
- PokerGoIE,
PokerGoCollectionIE,
+ PokerGoIE,
)
from .polsatgo import PolsatGoIE
from .polskieradio import (
- PolskieRadioIE,
- PolskieRadioLegacyIE,
PolskieRadioAuditionIE,
PolskieRadioCategoryIE,
+ PolskieRadioIE,
+ PolskieRadioLegacyIE,
PolskieRadioPlayerIE,
PolskieRadioPodcastIE,
PolskieRadioPodcastListIE,
)
from .popcorntimes import PopcorntimesIE
from .popcorntv import PopcornTVIE
-from .porn91 import Porn91IE
from .pornbox import PornboxIE
from .pornflip import PornFlipIE
from .pornhub import (
PornHubIE,
- PornHubUserIE,
- PornHubPlaylistIE,
PornHubPagedVideoListIE,
+ PornHubPlaylistIE,
+ PornHubUserIE,
PornHubUserVideosUploadIE,
)
from .pornotube import PornotubeIE
from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
-from .puhutv import (
- PuhuTVIE,
- PuhuTVSerieIE,
-)
from .pr0gramm import Pr0grammIE
-from .prankcast import PrankCastIE, PrankCastPostIE
+from .prankcast import (
+ PrankCastIE,
+ PrankCastPostIE,
+)
from .premiershiprugby import PremiershipRugbyIE
from .presstv import PressTVIE
from .projectveritas import ProjectVeritasIE
from .prosiebensat1 import ProSiebenSat1IE
from .prx import (
- PRXStoryIE,
- PRXSeriesIE,
PRXAccountIE,
+ PRXSeriesIE,
+ PRXSeriesSearchIE,
PRXStoriesSearchIE,
- PRXSeriesSearchIE
+ PRXStoryIE,
+)
+from .puhutv import (
+ PuhuTVIE,
+ PuhuTVSerieIE,
)
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
from .qdance import QDanceIE
from .qingting import QingTingIE
from .qqmusic import (
+ QQMusicAlbumIE,
QQMusicIE,
+ QQMusicPlaylistIE,
QQMusicSingerIE,
- QQMusicAlbumIE,
QQMusicToplistIE,
- QQMusicPlaylistIE,
)
from .r7 import (
R7IE,
R7ArticleIE,
)
-from .radiko import RadikoIE, RadikoRadioIE
+from .radiko import (
+ RadikoIE,
+ RadikoRadioIE,
+)
from .radiocanada import (
- RadioCanadaIE,
RadioCanadaAudioVideoIE,
+ RadioCanadaIE,
)
from .radiocomercial import (
RadioComercialIE,
RadioComercialPlaylistIE,
)
from .radiode import RadioDeIE
-from .radiojavan import RadioJavanIE
from .radiofrance import (
FranceCultureIE,
RadioFranceIE,
@@ -1559,35 +1629,36 @@ from .radiofrance import (
RadioFranceProfileIE,
RadioFranceProgramScheduleIE,
)
-from .radiozet import RadioZetPodcastIE
+from .radiojavan import RadioJavanIE
from .radiokapital import (
RadioKapitalIE,
RadioKapitalShowIE,
)
+from .radiozet import RadioZetPodcastIE
from .radlive import (
- RadLiveIE,
RadLiveChannelIE,
+ RadLiveIE,
RadLiveSeasonIE,
)
from .rai import (
- RaiIE,
RaiCulturaIE,
+ RaiIE,
+ RaiNewsIE,
RaiPlayIE,
RaiPlayLiveIE,
RaiPlayPlaylistIE,
RaiPlaySoundIE,
RaiPlaySoundLiveIE,
RaiPlaySoundPlaylistIE,
- RaiNewsIE,
RaiSudtirolIE,
)
from .raywenderlich import (
- RayWenderlichIE,
RayWenderlichCourseIE,
+ RayWenderlichIE,
)
from .rbgtum import (
- RbgTumIE,
RbgTumCourseIE,
+ RbgTumIE,
RbgTumNewCourseIE,
)
from .rcs import (
@@ -1601,12 +1672,15 @@ from .rcti import (
RCTIPlusTVIE,
)
from .rds import RDSIE
-from .redbee import ParliamentLiveUKIE, RTBFIE
+from .redbee import (
+ RTBFIE,
+ ParliamentLiveUKIE,
+)
from .redbulltv import (
- RedBullTVIE,
RedBullEmbedIE,
- RedBullTVRrnContentIE,
RedBullIE,
+ RedBullTVIE,
+ RedBullTVRrnContentIE,
)
from .reddit import RedditIE
from .redge import RedCDNLivxIE
@@ -1626,107 +1700,100 @@ from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .ridehome import RideHomeIE
from .rinsefm import (
- RinseFMIE,
RinseFMArtistPlaylistIE,
+ RinseFMIE,
)
from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE
from .rokfin import (
- RokfinIE,
- RokfinStackIE,
RokfinChannelIE,
+ RokfinIE,
RokfinSearchIE,
+ RokfinStackIE,
+)
+from .roosterteeth import (
+ RoosterTeethIE,
+ RoosterTeethSeriesIE,
)
-from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
from .rottentomatoes import RottenTomatoesIE
from .rozhlas import (
+ MujRozhlasIE,
RozhlasIE,
RozhlasVltavaIE,
- MujRozhlasIE,
)
-from .rte import RteIE, RteRadioIE
+from .rte import (
+ RteIE,
+ RteRadioIE,
+)
+from .rtl2 import RTL2IE
from .rtlnl import (
- RtlNlIE,
- RTLLuTeleVODIE,
RTLLuArticleIE,
RTLLuLiveIE,
RTLLuRadioIE,
+ RTLLuTeleVODIE,
+ RtlNlIE,
)
-from .rtl2 import RTL2IE
from .rtnews import (
- RTNewsIE,
RTDocumentryIE,
RTDocumentryPlaylistIE,
+ RTNewsIE,
RuptlyIE,
)
from .rtp import RTPIE
from .rtrfm import RTRFMIE
from .rts import RTSIE
from .rtvcplay import (
- RTVCPlayIE,
- RTVCPlayEmbedIE,
RTVCKalturaIE,
+ RTVCPlayEmbedIE,
+ RTVCPlayIE,
)
from .rtve import (
RTVEALaCartaIE,
RTVEAudioIE,
- RTVELiveIE,
RTVEInfantilIE,
+ RTVELiveIE,
RTVETelevisionIE,
)
from .rtvs import RTVSIE
from .rtvslo import RTVSLOIE
+from .rudovideo import RudoVideoIE
from .rule34video import Rule34VideoIE
from .rumble import (
+ RumbleChannelIE,
RumbleEmbedIE,
RumbleIE,
- RumbleChannelIE,
)
-from .rudovideo import RudoVideoIE
from .rutube import (
- RutubeIE,
RutubeChannelIE,
RutubeEmbedIE,
+ RutubeIE,
RutubeMovieIE,
RutubePersonIE,
RutubePlaylistIE,
RutubeTagsIE,
)
-from .glomex import (
- GlomexIE,
- GlomexEmbedIE,
-)
-from .megatvcom import (
- MegaTVComIE,
- MegaTVComEmbedIE,
-)
-from .antenna import (
- AntennaGrWatchIE,
- Ant1NewsGrArticleIE,
- Ant1NewsGrEmbedIE,
-)
from .rutv import RUTVIE
from .ruutu import RuutuIE
from .ruv import (
RuvIE,
- RuvSpilaIE
+ RuvSpilaIE,
)
from .s4c import (
S4CIE,
- S4CSeriesIE
+ S4CSeriesIE,
)
from .safari import (
- SafariIE,
SafariApiIE,
SafariCourseIE,
+ SafariIE,
)
from .saitosan import SaitosanIE
from .samplefocus import SampleFocusIE
from .sapo import SapoIE
from .sbs import SBSIE
from .sbscokr import (
- SBSCoKrIE,
SBSCoKrAllvodProgramIE,
+ SBSCoKrIE,
SBSCoKrProgramsVodIE,
)
from .screen9 import Screen9IE
@@ -1734,24 +1801,27 @@ from .screencast import ScreencastIE
from .screencastify import ScreencastifyIE
from .screencastomatic import ScreencastOMaticIE
from .scrippsnetworks import (
- ScrippsNetworksWatchIE,
ScrippsNetworksIE,
+ ScrippsNetworksWatchIE,
)
+from .scrolller import ScrolllerIE
from .scte import (
SCTEIE,
SCTECourseIE,
)
-from .scrolller import ScrolllerIE
from .sejmpl import SejmIE
from .senalcolombia import SenalColombiaLiveIE
-from .senategov import SenateISVPIE, SenateGovIE
+from .senategov import (
+ SenateGovIE,
+ SenateISVPIE,
+)
from .sendtonews import SendtoNewsIE
from .servus import ServusIE
from .sevenplus import SevenPlusIE
from .sexu import SexuIE
from .seznamzpravy import (
- SeznamZpravyIE,
SeznamZpravyArticleIE,
+ SeznamZpravyIE,
)
from .shahid import (
ShahidIE,
@@ -1759,38 +1829,38 @@ from .shahid import (
)
from .sharepoint import SharePointIE
from .sharevideos import ShareVideosEmbedIE
-from .sibnet import SibnetEmbedIE
from .shemaroome import ShemarooMeIE
from .showroomlive import ShowRoomLiveIE
+from .sibnet import SibnetEmbedIE
from .simplecast import (
- SimplecastIE,
SimplecastEpisodeIE,
+ SimplecastIE,
SimplecastPodcastIE,
)
from .sina import SinaIE
from .sixplay import SixPlayIE
from .skeb import SkebIE
+from .sky import (
+ SkyNewsIE,
+ SkyNewsStoryIE,
+ SkySportsIE,
+ SkySportsNewsIE,
+)
from .skyit import (
+ CieloTVItIE,
+ SkyItArteIE,
+ SkyItIE,
SkyItPlayerIE,
SkyItVideoIE,
SkyItVideoLiveIE,
- SkyItIE,
- SkyItArteIE,
- CieloTVItIE,
TV8ItIE,
)
from .skylinewebcams import SkylineWebcamsIE
from .skynewsarabia import (
- SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
+ SkyNewsArabiaIE,
)
from .skynewsau import SkyNewsAUIE
-from .sky import (
- SkyNewsIE,
- SkyNewsStoryIE,
- SkySportsIE,
- SkySportsNewsIE,
-)
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
@@ -1807,29 +1877,29 @@ from .sonyliv import (
from .soundcloud import (
SoundcloudEmbedIE,
SoundcloudIE,
- SoundcloudSetIE,
+ SoundcloudPlaylistIE,
SoundcloudRelatedIE,
+ SoundcloudSearchIE,
+ SoundcloudSetIE,
+ SoundcloudTrackStationIE,
SoundcloudUserIE,
SoundcloudUserPermalinkIE,
- SoundcloudTrackStationIE,
- SoundcloudPlaylistIE,
- SoundcloudSearchIE,
)
from .soundgasm import (
SoundgasmIE,
- SoundgasmProfileIE
+ SoundgasmProfileIE,
)
from .southpark import (
- SouthParkIE,
SouthParkDeIE,
SouthParkDkIE,
SouthParkEsIE,
+ SouthParkIE,
SouthParkLatIE,
- SouthParkNlIE
+ SouthParkNlIE,
)
from .sovietscloset import (
SovietsClosetIE,
- SovietsClosetPlaylistIE
+ SovietsClosetPlaylistIE,
)
from .spankbang import (
SpankBangIE,
@@ -1840,12 +1910,6 @@ from .spike import (
BellatorIE,
ParamountNetworkIE,
)
-from .stageplus import StagePlusVODConcertIE
-from .startrek import StarTrekIE
-from .stitcher import (
- StitcherIE,
- StitcherShowIE,
-)
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
@@ -1869,19 +1933,25 @@ from .srmediathek import SRMediathekIE
from .stacommu import (
StacommuLiveIE,
StacommuVODIE,
- TheaterComplexTownVODIE,
TheaterComplexTownPPVIE,
+ TheaterComplexTownVODIE,
)
+from .stageplus import StagePlusVODConcertIE
from .stanfordoc import StanfordOpenClassroomIE
+from .startrek import StarTrekIE
from .startv import StarTVIE
from .steam import (
- SteamIE,
SteamCommunityBroadcastIE,
+ SteamIE,
+)
+from .stitcher import (
+ StitcherIE,
+ StitcherShowIE,
)
from .storyfire import (
StoryFireIE,
- StoryFireUserIE,
StoryFireSeriesIE,
+ StoryFireUserIE,
)
from .streamable import StreamableIE
from .streamcz import StreamCZIE
@@ -1902,20 +1972,26 @@ from .svt import (
SVTSeriesIE,
)
from .swearnet import SwearnetEpisodeIE
-from .syvdk import SYVDKIE
from .syfy import SyfyIE
+from .syvdk import SYVDKIE
from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE
+from .taptap import (
+ TapTapAppIE,
+ TapTapAppIntlIE,
+ TapTapMomentIE,
+ TapTapPostIntlIE,
+)
from .tass import TassIE
from .tbs import TBSIE
from .tbsjp import (
TBSJPEpisodeIE,
- TBSJPProgramIE,
TBSJPPlaylistIE,
+ TBSJPProgramIE,
)
from .teachable import (
- TeachableIE,
TeachableCourseIE,
+ TeachableIE,
)
from .teachertube import (
TeacherTubeIE,
@@ -1923,8 +1999,8 @@ from .teachertube import (
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import (
- TeamcocoIE,
ConanClassicIE,
+ TeamcocoIE,
)
from .teamtreehouse import TeamTreeHouseIE
from .ted import (
@@ -1943,15 +2019,18 @@ from .telegram import TelegramEmbedIE
from .telemb import TeleMBIE
from .telemundo import TelemundoIE
from .telequebec import (
- TeleQuebecIE,
- TeleQuebecSquatIE,
TeleQuebecEmissionIE,
+ TeleQuebecIE,
TeleQuebecLiveIE,
+ TeleQuebecSquatIE,
TeleQuebecVideoIE,
)
from .teletask import TeleTaskIE
from .telewebion import TelewebionIE
-from .tempo import TempoIE, IVXPlayerIE
+from .tempo import (
+ IVXPlayerIE,
+ TempoIE,
+)
from .tencent import (
IflixEpisodeIE,
IflixSeriesIE,
@@ -1975,8 +2054,8 @@ from .theguardian import (
from .theholetv import TheHoleTvIE
from .theintercept import TheInterceptIE
from .theplatform import (
- ThePlatformIE,
ThePlatformFeedIE,
+ ThePlatformIE,
)
from .thestar import TheStarIE
from .thesun import TheSunIE
@@ -1988,50 +2067,52 @@ from .thisvid import (
ThisVidMemberIE,
ThisVidPlaylistIE,
)
+from .threeqsdn import ThreeQSDNIE
from .threespeak import (
ThreeSpeakIE,
ThreeSpeakUserIE,
)
-from .threeqsdn import ThreeQSDNIE
from .tiktok import (
+ DouyinIE,
+ TikTokCollectionIE,
+ TikTokEffectIE,
TikTokIE,
- TikTokUserIE,
+ TikTokLiveIE,
TikTokSoundIE,
- TikTokEffectIE,
TikTokTagIE,
+ TikTokUserIE,
TikTokVMIE,
- TikTokLiveIE,
- DouyinIE,
)
from .tmz import TMZIE
from .tnaflix import (
- TNAFlixNetworkEmbedIE,
- TNAFlixIE,
EMPFlixIE,
MovieFapIE,
+ TNAFlixIE,
+ TNAFlixNetworkEmbedIE,
)
from .toggle import (
- ToggleIE,
MeWatchIE,
+ ToggleIE,
)
-from .toggo import (
- ToggoIE,
-)
+from .toggo import ToggoIE
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
from .toutv import TouTvIE
-from .toypics import ToypicsUserIE, ToypicsIE
+from .toypics import (
+ ToypicsIE,
+ ToypicsUserIE,
+)
from .traileraddict import TrailerAddictIE
from .triller import (
TrillerIE,
- TrillerUserIE,
TrillerShortIE,
+ TrillerUserIE,
)
from .trovo import (
+ TrovoChannelClipIE,
+ TrovoChannelVodIE,
TrovoIE,
TrovoVodIE,
- TrovoChannelVodIE,
- TrovoChannelClipIE,
)
from .trtcocuk import TrtCocukVideoIE
from .trtworld import TrtWorldIE
@@ -2040,26 +2121,26 @@ from .trunews import TruNewsIE
from .truth import TruthIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
-from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE
+from .tubetugraz import (
+ TubeTuGrazIE,
+ TubeTuGrazSeriesIE,
+)
from .tubitv import (
TubiTvIE,
TubiTvShowIE,
)
from .tumblr import TumblrIE
from .tunein import (
- TuneInStationIE,
- TuneInPodcastIE,
TuneInPodcastEpisodeIE,
+ TuneInPodcastIE,
TuneInShortenerIE,
+ TuneInStationIE,
)
from .tv2 import (
TV2IE,
- TV2ArticleIE,
KatsomoIE,
MTVUutisetArticleIE,
-)
-from .tv24ua import (
- TV24UAVideoIE,
+ TV2ArticleIE,
)
from .tv2dk import (
TV2DKIE,
@@ -2072,16 +2153,17 @@ from .tv2hu import (
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tv5unis import (
- TV5UnisVideoIE,
TV5UnisIE,
+ TV5UnisVideoIE,
)
+from .tv24ua import TV24UAVideoIE
from .tva import (
TVAIE,
QubIE,
)
from .tvanouvelles import (
- TVANouvellesIE,
TVANouvellesArticleIE,
+ TVANouvellesIE,
)
from .tvc import (
TVCIE,
@@ -2094,19 +2176,19 @@ from .tvland import TVLandIE
from .tvn24 import TVN24IE
from .tvnoe import TVNoeIE
from .tvopengr import (
- TVOpenGrWatchIE,
TVOpenGrEmbedIE,
+ TVOpenGrWatchIE,
)
from .tvp import (
- TVPEmbedIE,
TVPIE,
+ TVPEmbedIE,
TVPStreamIE,
TVPVODSeriesIE,
TVPVODVideoIE,
)
from .tvplay import (
- TVPlayIE,
TVPlayHomeIE,
+ TVPlayIE,
)
from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
@@ -2118,29 +2200,29 @@ from .twitcasting import (
TwitCastingUserIE,
)
from .twitch import (
- TwitchVodIE,
+ TwitchClipsIE,
TwitchCollectionIE,
- TwitchVideosIE,
+ TwitchStreamIE,
TwitchVideosClipsIE,
TwitchVideosCollectionsIE,
- TwitchStreamIE,
- TwitchClipsIE,
+ TwitchVideosIE,
+ TwitchVodIE,
)
from .twitter import (
- TwitterCardIE,
- TwitterIE,
TwitterAmplifyIE,
TwitterBroadcastIE,
- TwitterSpacesIE,
+ TwitterCardIE,
+ TwitterIE,
TwitterShortenerIE,
+ TwitterSpacesIE,
)
from .txxx import (
- TxxxIE,
PornTopIE,
+ TxxxIE,
)
from .udemy import (
+ UdemyCourseIE,
UdemyIE,
- UdemyCourseIE
)
from .udn import UDNEmbedIE
from .ufctv import (
@@ -2149,16 +2231,13 @@ from .ufctv import (
)
from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE
-from .digiteka import DigitekaIE
-from .dlive import (
- DLiveVODIE,
- DLiveStreamIE,
-)
-from .drooble import DroobleIE
from .umg import UMGDeIE
from .unistra import UnistraIE
from .unity import UnityIE
-from .unsupported import KnownDRMIE, KnownPiracyIE
+from .unsupported import (
+ KnownDRMIE,
+ KnownPiracyIE,
+)
from .uol import UOLIE
from .uplynk import (
UplynkIE,
@@ -2168,10 +2247,13 @@ from .urort import UrortIE
from .urplay import URPlayIE
from .usanetwork import USANetworkIE
from .usatoday import USATodayIE
-from .ustream import UstreamIE, UstreamChannelIE
+from .ustream import (
+ UstreamChannelIE,
+ UstreamIE,
+)
from .ustudio import (
- UstudioIE,
UstudioEmbedIE,
+ UstudioIE,
)
from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE
@@ -2179,7 +2261,7 @@ from .vbox7 import Vbox7IE
from .veo import VeoIE
from .veoh import (
VeohIE,
- VeohUserIE
+ VeohUserIE,
)
from .vesti import VestiIE
from .vevo import (
@@ -2187,14 +2269,14 @@ from .vevo import (
VevoPlaylistIE,
)
from .vgtv import (
+ VGTVIE,
BTArticleIE,
BTVestlendingenIE,
- VGTVIE,
)
from .vh1 import VH1IE
from .vice import (
- ViceIE,
ViceArticleIE,
+ ViceIE,
ViceShowIE,
)
from .viddler import ViddlerIE
@@ -2206,42 +2288,46 @@ from .videocampus_sachsen import (
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videoken import (
+ VideoKenCategoryIE,
VideoKenIE,
VideoKenPlayerIE,
VideoKenPlaylistIE,
- VideoKenCategoryIE,
VideoKenTopicIE,
)
from .videomore import (
VideomoreIE,
- VideomoreVideoIE,
VideomoreSeasonIE,
+ VideomoreVideoIE,
)
from .videopress import VideoPressIE
from .vidio import (
VidioIE,
+ VidioLiveIE,
VidioPremierIE,
- VidioLiveIE
)
from .vidlii import VidLiiIE
from .vidly import VidlyIE
from .viewlift import (
- ViewLiftIE,
ViewLiftEmbedIE,
+ ViewLiftIE,
)
from .viidea import ViideaIE
+from .viki import (
+ VikiChannelIE,
+ VikiIE,
+)
from .vimeo import (
- VimeoIE,
+ VHXEmbedIE,
VimeoAlbumIE,
VimeoChannelIE,
VimeoGroupsIE,
+ VimeoIE,
VimeoLikesIE,
VimeoOndemandIE,
VimeoProIE,
VimeoReviewIE,
VimeoUserIE,
VimeoWatchLaterIE,
- VHXEmbedIE,
)
from .vimm import (
VimmIE,
@@ -2251,50 +2337,41 @@ from .vine import (
VineIE,
VineUserIE,
)
-from .viki import (
- VikiIE,
- VikiChannelIE,
-)
from .viously import ViouslyIE
from .viqeo import ViqeoIE
from .viu import (
ViuIE,
- ViuPlaylistIE,
ViuOTTIE,
ViuOTTIndonesiaIE,
+ ViuPlaylistIE,
)
from .vk import (
VKIE,
- VKUserVideosIE,
- VKWallPostIE,
VKPlayIE,
VKPlayLiveIE,
+ VKUserVideosIE,
+ VKWallPostIE,
)
from .vocaroo import VocarooIE
from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicy import (
- VoicyIE,
VoicyChannelIE,
+ VoicyIE,
)
from .volejtv import VolejTVIE
-from .voot import (
- VootIE,
- VootSeriesIE,
-)
from .voxmedia import (
- VoxMediaVolumeIE,
VoxMediaIE,
+ VoxMediaVolumeIE,
)
from .vrt import (
VRTIE,
- VrtNUIE,
- KetnetIE,
DagelijkseKostIE,
+ KetnetIE,
Radio1BeIE,
+ VrtNUIE,
)
from .vtm import VTMIE
-from .medialaan import MedialaanIE
from .vuclip import VuClipIE
from .vvvvid import (
VVVVIDIE,
@@ -2302,20 +2379,20 @@ from .vvvvid import (
)
from .walla import WallaIE
from .washingtonpost import (
- WashingtonPostIE,
WashingtonPostArticleIE,
+ WashingtonPostIE,
)
from .wat import WatIE
from .wdr import (
WDRIE,
- WDRPageIE,
WDRElefantIE,
WDRMobileIE,
+ WDRPageIE,
)
from .webcamerapl import WebcameraplIE
from .webcaster import (
- WebcasterIE,
WebcasterFeedIE,
+ WebcasterIE,
)
from .webofstories import (
WebOfStoriesIE,
@@ -2323,42 +2400,42 @@ from .webofstories import (
)
from .weibo import (
WeiboIE,
- WeiboVideoIE,
WeiboUserIE,
+ WeiboVideoIE,
)
from .weiqitv import WeiqiTVIE
from .weverse import (
WeverseIE,
- WeverseMediaIE,
- WeverseMomentIE,
+ WeverseLiveIE,
WeverseLiveTabIE,
+ WeverseMediaIE,
WeverseMediaTabIE,
- WeverseLiveIE,
+ WeverseMomentIE,
)
from .wevidi import WeVidiIE
from .weyyak import WeyyakIE
+from .whowatch import WhoWatchIE
from .whyp import WhypIE
from .wikimedia import WikimediaIE
from .wimbledon import WimbledonIE
from .wimtv import WimTVIE
-from .whowatch import WhoWatchIE
from .wistia import (
+ WistiaChannelIE,
WistiaIE,
WistiaPlaylistIE,
- WistiaChannelIE,
)
from .wordpress import (
- WordpressPlaylistEmbedIE,
WordpressMiniAudioPlayerEmbedIE,
+ WordpressPlaylistEmbedIE,
)
from .worldstarhiphop import WorldStarHipHopIE
from .wppilot import (
- WPPilotIE,
WPPilotChannelsIE,
+ WPPilotIE,
)
from .wrestleuniverse import (
- WrestleUniverseVODIE,
WrestleUniversePPVIE,
+ WrestleUniverseVODIE,
)
from .wsj import (
WSJIE,
@@ -2366,22 +2443,22 @@ from .wsj import (
)
from .wwe import WWEIE
from .wykop import (
- WykopDigIE,
WykopDigCommentIE,
- WykopPostIE,
+ WykopDigIE,
WykopPostCommentIE,
+ WykopPostIE,
)
from .xanimu import XanimuIE
from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
from .xhamster import (
- XHamsterIE,
XHamsterEmbedIE,
+ XHamsterIE,
XHamsterUserIE,
)
+from .xiaohongshu import XiaoHongShuIE
from .ximalaya import (
+ XimalayaAlbumIE,
XimalayaIE,
- XimalayaAlbumIE
)
from .xinpianchang import XinpianchangIE
from .xminus import XMinusIE
@@ -2389,27 +2466,27 @@ from .xnxx import XNXXIE
from .xstream import XstreamIE
from .xvideos import (
XVideosIE,
- XVideosQuickiesIE
+ XVideosQuickiesIE,
)
from .xxxymovies import XXXYMoviesIE
from .yahoo import (
YahooIE,
- YahooSearchIE,
YahooJapanNewsIE,
+ YahooSearchIE,
)
from .yandexdisk import YandexDiskIE
from .yandexmusic import (
- YandexMusicTrackIE,
YandexMusicAlbumIE,
- YandexMusicPlaylistIE,
- YandexMusicArtistTracksIE,
YandexMusicArtistAlbumsIE,
+ YandexMusicArtistTracksIE,
+ YandexMusicPlaylistIE,
+ YandexMusicTrackIE,
)
from .yandexvideo import (
YandexVideoIE,
YandexVideoPreviewIE,
- ZenYandexIE,
ZenYandexChannelIE,
+ ZenYandexIE,
)
from .yapfiles import YapFilesIE
from .yappy import (
@@ -2423,26 +2500,34 @@ from .youku import (
YoukuShowIE,
)
from .younow import (
- YouNowLiveIE,
YouNowChannelIE,
+ YouNowLiveIE,
YouNowMomentIE,
)
-from .youporn import YouPornIE
-from .yourporn import YourPornIE
-from .yourupload import YourUploadIE
+from .youporn import (
+ YouPornCategoryIE,
+ YouPornChannelIE,
+ YouPornCollectionIE,
+ YouPornIE,
+ YouPornStarIE,
+ YouPornTagIE,
+ YouPornVideosIE,
+)
from .zaiko import (
- ZaikoIE,
ZaikoETicketIE,
+ ZaikoIE,
)
from .zapiks import ZapiksIE
from .zattoo import (
BBVTVIE,
+ EWETVIE,
+ SAKTVIE,
+ VTXTVIE,
BBVTVLiveIE,
BBVTVRecordingsIE,
EinsUndEinsTVIE,
EinsUndEinsTVLiveIE,
EinsUndEinsTVRecordingsIE,
- EWETVIE,
EWETVLiveIE,
EWETVRecordingsIE,
GlattvisionTVIE,
@@ -2460,13 +2545,11 @@ from .zattoo import (
QuantumTVIE,
QuantumTVLiveIE,
QuantumTVRecordingsIE,
+ SAKTVLiveIE,
+ SAKTVRecordingsIE,
SaltTVIE,
SaltTVLiveIE,
SaltTVRecordingsIE,
- SAKTVIE,
- SAKTVLiveIE,
- SAKTVRecordingsIE,
- VTXTVIE,
VTXTVLiveIE,
VTXTVRecordingsIE,
WalyTVIE,
@@ -2477,7 +2560,10 @@ from .zattoo import (
ZattooMoviesIE,
ZattooRecordingsIE,
)
-from .zdf import ZDFIE, ZDFChannelIE
+from .zdf import (
+ ZDFIE,
+ ZDFChannelIE,
+)
from .zee5 import (
Zee5IE,
Zee5SeriesIE,
@@ -2487,16 +2573,16 @@ from .zenporn import ZenPornIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE
from .zingmp3 import (
- ZingMp3IE,
ZingMp3AlbumIE,
ZingMp3ChartHomeIE,
- ZingMp3WeekChartIE,
ZingMp3ChartMusicVideoIE,
- ZingMp3UserIE,
ZingMp3HubIE,
+ ZingMp3IE,
ZingMp3LiveRadioIE,
ZingMp3PodcastEpisodeIE,
ZingMp3PodcastIE,
+ ZingMp3UserIE,
+ ZingMp3WeekChartIE,
)
from .zoom import ZoomIE
from .zype import ZypeIE
diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py
index b217422..2c0d296 100644
--- a/yt_dlp/extractor/abc.py
+++ b/yt_dlp/extractor/abc.py
@@ -6,10 +6,10 @@ import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- dict_get,
ExtractorError,
- js_to_json,
+ dict_get,
int_or_none,
+ js_to_json,
parse_iso8601,
str_or_none,
traverse_obj,
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index fee7375..b8c79b9 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -12,20 +12,21 @@ import urllib.parse
import urllib.request
import urllib.response
import uuid
-from ..utils.networking import clean_proxies
+
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
ExtractorError,
+ OnDemandPagedList,
bytes_to_intlist,
decode_base_n,
int_or_none,
intlist_to_bytes,
- OnDemandPagedList,
time_seconds,
traverse_obj,
update_url_query,
)
+from ..utils.networking import clean_proxies
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py
index c3b4f43..0793319 100644
--- a/yt_dlp/extractor/acfun.py
+++ b/yt_dlp/extractor/acfun.py
@@ -3,10 +3,10 @@ from ..utils import (
float_or_none,
format_field,
int_or_none,
- str_or_none,
- traverse_obj,
parse_codecs,
parse_qs,
+ str_or_none,
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py
index 898d372..2f3b67d 100644
--- a/yt_dlp/extractor/adn.py
+++ b/yt_dlp/extractor/adn.py
@@ -10,18 +10,18 @@ from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_b64decode
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
ass_subtitles_timecode,
bytes_to_intlist,
bytes_to_long,
- ExtractorError,
float_or_none,
int_or_none,
intlist_to_bytes,
long_to_bytes,
parse_iso8601,
pkcs1pad,
- strip_or_none,
str_or_none,
+ strip_or_none,
try_get,
unified_strdate,
urlencode_postdata,
diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py
index d1525a1..08e9e51 100644
--- a/yt_dlp/extractor/adobetv.py
+++ b/yt_dlp/extractor/adobetv.py
@@ -4,11 +4,11 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ISO639Utils,
+ OnDemandPagedList,
float_or_none,
int_or_none,
- ISO639Utils,
join_nonempty,
- OnDemandPagedList,
parse_duration,
str_or_none,
str_to_int,
diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py
index 2c33c90..3e5738f 100644
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@@ -8,9 +8,11 @@ from ..utils import (
determine_ext,
filter_dict,
int_or_none,
+ orderedSet,
unified_timestamp,
url_or_none,
urlencode_postdata,
+ urljoin,
)
from ..utils.traversal import traverse_obj
@@ -276,6 +278,47 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
}]
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
+ _WORKING_CDNS = [
+ 'gcp_cdn', # live-global-cdn-v02.afreecatv.com
+ 'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
+ 'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
+ 'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
+ ]
+ _BAD_CDNS = [
+ 'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
+ 'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
+ 'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
+ 'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
+ 'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
+ ]
+
+ def _extract_formats(self, channel_info, broadcast_no, aid):
+ stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
+
+ # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
+ default_cdn_ids = orderedSet([
+ *traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)),
+ *self._WORKING_CDNS,
+ ])
+ cdn_ids = self._configuration_arg('cdn', default_cdn_ids)
+
+ for attempt, cdn_id in enumerate(cdn_ids, start=1):
+ m3u8_url = traverse_obj(self._download_json(
+ urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no,
+ f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info',
+ fatal=False, query={
+ 'return_type': cdn_id,
+ 'broad_key': f'{broadcast_no}-common-master-hls',
+ }), ('view_url', {url_or_none}))
+ try:
+ return self._extract_m3u8_formats(
+ m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
+ headers={'Referer': 'https://play.afreecatv.com/'})
+ except ExtractorError as e:
+ if attempt == len(cdn_ids):
+ raise
+ self.report_warning(
+ f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})')
def _real_extract(self, url):
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
@@ -294,7 +337,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
'This livestream is protected by a password, use the --video-password option',
expected=True)
- aid = self._download_json(
+ token_info = traverse_obj(self._download_json(
self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
'bno': broadcast_no,
@@ -302,18 +345,17 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
'type': 'aid',
'quality': 'master',
'pwd': password,
- })))['CHANNEL']['AID']
-
- stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
- stream_info = self._download_json(f'{stream_base_url}/broad_stream_assign.html', broadcast_no, query={
- # works: gs_cdn_pc_app, gs_cdn_mobile_web, gs_cdn_pc_web
- 'return_type': 'gs_cdn_pc_app',
- 'broad_key': f'{broadcast_no}-common-master-hls',
- }, note='Downloading metadata for stream', errnote='Unable to download metadata for stream')
-
- formats = self._extract_m3u8_formats(
- stream_info['view_url'], broadcast_no, 'mp4', m3u8_id='hls',
- query={'aid': aid}, headers={'Referer': url})
+ }))), ('CHANNEL', {dict})) or {}
+ aid = token_info.get('AID')
+ if not aid:
+ result = token_info.get('RESULT')
+ if result == 0:
+ raise ExtractorError('This livestream has ended', expected=True)
+ elif result == -6:
+ self.raise_login_required('This livestream is for subscribers only', method='password')
+ raise ExtractorError('Unable to extract access token')
+
+ formats = self._extract_formats(channel_info, broadcast_no, aid)
station_info = traverse_obj(self._download_json(
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py
index 0b73a96..6cc63cd 100644
--- a/yt_dlp/extractor/airtv.py
+++ b/yt_dlp/extractor/airtv.py
@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_iso8601,
- traverse_obj
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py
index 87219f2..49df4bf 100644
--- a/yt_dlp/extractor/allstar.py
+++ b/yt_dlp/extractor/allstar.py
@@ -12,7 +12,6 @@ from ..utils import (
)
from ..utils.traversal import traverse_obj
-
_FIELDS = '''
_id
clipImageSource
diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py
index 8d5b472..f927965 100644
--- a/yt_dlp/extractor/alphaporno.py
+++ b/yt_dlp/extractor/alphaporno.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..utils import (
- parse_iso8601,
+ int_or_none,
parse_duration,
parse_filesize,
- int_or_none,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py
index bfe066b..cb2b989 100644
--- a/yt_dlp/extractor/alura.py
+++ b/yt_dlp/extractor/alura.py
@@ -1,17 +1,13 @@
import re
from .common import InfoExtractor
-
-from ..compat import (
- compat_urlparse,
-)
-
+from ..compat import compat_urlparse
from ..utils import (
+ ExtractorError,
+ clean_html,
+ int_or_none,
urlencode_postdata,
urljoin,
- int_or_none,
- clean_html,
- ExtractorError
)
@@ -39,7 +35,7 @@ class AluraIE(InfoExtractor):
def _real_extract(self, url):
- course, video_id = self._match_valid_url(url)
+ course, video_id = self._match_valid_url(url).group('course_name', 'id')
video_url = self._VIDEO_URL % (course, video_id)
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
@@ -52,7 +48,7 @@ class AluraIE(InfoExtractor):
formats = []
for video_obj in video_dict:
- video_url_m3u8 = video_obj.get('link')
+ video_url_m3u8 = video_obj.get('mp4')
video_format = self._extract_m3u8_formats(
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py
index 5018710..509b21a 100644
--- a/yt_dlp/extractor/amara.py
+++ b/yt_dlp/extractor/amara.py
@@ -1,6 +1,6 @@
from .common import InfoExtractor
-from .youtube import YoutubeIE
from .vimeo import VimeoIE
+from .youtube import YoutubeIE
from ..utils import (
int_or_none,
parse_iso8601,
diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py
index 0d259c5..6b2bf2d 100644
--- a/yt_dlp/extractor/amp.py
+++ b/yt_dlp/extractor/amp.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
mimetype2ext,
parse_iso8601,
diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py
index 52f2ad0..5e78f37 100644
--- a/yt_dlp/extractor/anchorfm.py
+++ b/yt_dlp/extractor/anchorfm.py
@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
- unified_timestamp
+ unified_timestamp,
)
diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py
index 306b365..9f5b9b5 100644
--- a/yt_dlp/extractor/angel.py
+++ b/yt_dlp/extractor/angel.py
@@ -1,7 +1,7 @@
import re
from .common import InfoExtractor
-from ..utils import url_or_none, merge_dicts
+from ..utils import merge_dicts, url_or_none
class AngelIE(InfoExtractor):
diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py
index d00b0f9..433eb4e 100644
--- a/yt_dlp/extractor/appleconnect.py
+++ b/yt_dlp/extractor/appleconnect.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- str_to_int,
- ExtractorError
-)
+from ..utils import ExtractorError, str_to_int
class AppleConnectIE(InfoExtractor):
diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py
index 2e0b0a8..21103ae 100644
--- a/yt_dlp/extractor/appletrailers.py
+++ b/yt_dlp/extractor/appletrailers.py
@@ -1,5 +1,5 @@
-import re
import json
+import re
from .common import InfoExtractor
from ..compat import compat_urlparse
diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py
index a493714..9a5524a 100644
--- a/yt_dlp/extractor/arnes.py
+++ b/yt_dlp/extractor/arnes.py
@@ -4,8 +4,8 @@ from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
- format_field,
float_or_none,
+ format_field,
int_or_none,
parse_iso8601,
remove_start,
diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py
index 1c180b1..46fe006 100644
--- a/yt_dlp/extractor/arte.py
+++ b/yt_dlp/extractor/arte.py
@@ -5,6 +5,7 @@ from ..utils import (
ExtractorError,
GeoRestrictedError,
int_or_none,
+ join_nonempty,
parse_iso8601,
parse_qs,
strip_or_none,
@@ -32,20 +33,6 @@ class ArteTVIE(ArteTVBaseIE):
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'only_matching': True,
}, {
- 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
- 'info_dict': {
- 'id': '100103-000-A',
- 'title': 'USA: Dyskryminacja na porodówce',
- 'description': 'md5:242017b7cce59ffae340a54baefcafb1',
- 'alt_title': 'ARTE Reportage',
- 'upload_date': '20201103',
- 'duration': 554,
- 'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
- 'timestamp': 1604417980,
- 'ext': 'mp4',
- },
- 'params': {'skip_download': 'm3u8'}
- }, {
'note': 'No alt_title',
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
'only_matching': True,
@@ -59,6 +46,23 @@ class ArteTVIE(ArteTVBaseIE):
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
'only_matching': True,
}, {
+ 'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
+ 'info_dict': {
+ 'id': '109067-000-A',
+ 'ext': 'mp4',
+ 'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
+ 'timestamp': 1713927600,
+ 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
+ 'duration': 7599,
+ 'title': 'La loi de Téhéran',
+ 'upload_date': '20240424',
+ 'subtitles': {
+ 'fr': 'mincount:1',
+ 'fr-acc': 'mincount:1',
+ 'fr-forced': 'mincount:1',
+ },
+ },
+ }, {
'note': 'age-restricted',
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
'info_dict': {
@@ -71,23 +75,7 @@ class ArteTVIE(ArteTVBaseIE):
'upload_date': '20230930',
'ext': 'mp4',
},
- }, {
- 'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
- 'info_dict': {
- 'id': '085374-003-A',
- 'ext': 'mp4',
- 'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
- 'timestamp': 1702872000,
- 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
- 'duration': 2594,
- 'title': 'Die kurze Zeit der Jugend',
- 'alt_title': 'Im hohen Norden geboren',
- 'upload_date': '20231218',
- 'subtitles': {
- 'fr': 'mincount:1',
- 'fr-acc': 'mincount:1',
- },
- },
+ 'skip': '404 Not Found',
}]
_GEO_BYPASS = True
@@ -143,16 +131,18 @@ class ArteTVIE(ArteTVBaseIE):
updated_subs = {}
for lang, sub_formats in subs.items():
for fmt in sub_formats:
- if fmt.get('url', '').endswith('-MAL.m3u8'):
- lang += '-acc'
- updated_subs.setdefault(lang, []).append(fmt)
+ url = fmt.get('url') or ''
+ suffix = ('acc' if url.endswith('-MAL.m3u8')
+ else 'forced' if '_VO' not in url
+ else None)
+ updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt)
return updated_subs
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')
- langauge_code = self._LANG_MAP.get(lang)
+ language_code = self._LANG_MAP.get(lang)
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
'x-validated-age': '18'
@@ -180,10 +170,10 @@ class ArteTVIE(ArteTVBaseIE):
m = self._VERSION_CODE_RE.match(stream_version_code)
if m:
lang_pref = int(''.join('01'[x] for x in (
- m.group('vlang') == langauge_code, # we prefer voice in the requested language
+ m.group('vlang') == language_code, # we prefer voice in the requested language
not m.group('audio_desc'), # and not the audio description version
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
- m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
+ m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
not m.group('has_sub'), # but we prefer no subtitles otherwise
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
)))
diff --git a/yt_dlp/extractor/asobistage.py b/yt_dlp/extractor/asobistage.py
index b088a1b..8fa8f3e 100644
--- a/yt_dlp/extractor/asobistage.py
+++ b/yt_dlp/extractor/asobistage.py
@@ -105,7 +105,7 @@ class AsobiStageIE(InfoExtractor):
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
webpage = self._download_webpage(url, video_id)
event_data = traverse_obj(
- self._search_nextjs_data(webpage, video_id, default='{}'),
+ self._search_nextjs_data(webpage, video_id, default={}),
('props', 'pageProps', 'eventCMSData', {
'title': ('event_name', {str}),
'thumbnail': ('event_thumbnail_image', {url_or_none}),
diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py
index d60feba..20ee34c 100644
--- a/yt_dlp/extractor/atvat.py
+++ b/yt_dlp/extractor/atvat.py
@@ -2,10 +2,10 @@ import datetime as dt
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
float_or_none,
jwt_encode_hs256,
try_get,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py
index 6fc938d..a8dfb3e 100644
--- a/yt_dlp/extractor/awaan.py
+++ b/yt_dlp/extractor/awaan.py
@@ -2,8 +2,8 @@ import base64
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_urlencode,
compat_str,
+ compat_urllib_parse_urlencode,
)
from ..utils import (
format_field,
diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py
index 67af29a..c4e07a7 100644
--- a/yt_dlp/extractor/banbye.py
+++ b/yt_dlp/extractor/banbye.py
@@ -2,12 +2,12 @@ import math
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_urlparse,
compat_parse_qs,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
- format_field,
InAdvancePagedList,
+ format_field,
traverse_obj,
unified_timestamp,
)
diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py
index 51e7220..82dc9ab 100644
--- a/yt_dlp/extractor/bannedvideo.py
+++ b/yt_dlp/extractor/bannedvideo.py
@@ -2,11 +2,11 @@ import json
from .common import InfoExtractor
from ..utils import (
- try_get,
- int_or_none,
- url_or_none,
float_or_none,
+ int_or_none,
+ try_get,
unified_timestamp,
+ url_or_none,
)
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index 015af9e..f6b58b3 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'url': 'http://www.bbc.com/news/world-europe-32668511',
'info_dict': {
'id': 'world-europe-32668511',
- 'title': 'Russia stages massive WW2 parade',
+ 'title': 'Russia stages massive WW2 parade despite Western boycott',
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
},
'playlist_count': 2,
@@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'info_dict': {
'id': '3662a707-0af9-3149-963f-47bea720b460',
'title': 'BUGGER',
+ 'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
},
'playlist_count': 18,
}, {
@@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'info_dict': {
'id': 'p02mprgb',
'ext': 'mp4',
- 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
- 'description': 'md5:2868290467291b37feda7863f7a83f54',
+ 'title': 'Germanwings crash site aerial video',
+ 'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
'duration': 47,
'timestamp': 1427219242,
'upload_date': '20150324',
+ 'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
},
'params': {
- # rtmp download
'skip_download': True,
}
}, {
@@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
},
'params': {
'skip_download': True,
- }
+ },
+ 'skip': 'now SIMORGH_DATA with no video',
}, {
# single video embedded with data-playable containing XML playlists (regional section)
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
'info_dict': {
- 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
+ 'id': '39275083',
+ 'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
'ext': 'mp4',
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
- 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
+ 'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
'timestamp': 1434713142,
'upload_date': '20150619',
+ 'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
},
'params': {
'skip_download': True,
- }
+ },
}, {
# single video from video playlist embedded with vxp-playlist-data JSON
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
@@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
},
'params': {
'skip_download': True,
- }
+ },
+ 'skip': '404 Not Found',
}, {
- # single video story with digitalData
+ # single video story with __PWA_PRELOADED_STATE__
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
'info_dict': {
'id': 'p02q6gc4',
- 'ext': 'flv',
- 'title': 'Sri Lanka’s spicy secret',
- 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
- 'timestamp': 1437674293,
- 'upload_date': '20150723',
+ 'ext': 'mp4',
+ 'title': 'Tasting the spice of life in Jaffna',
+ 'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
+ 'timestamp': 1646058397,
+ 'upload_date': '20220228',
+ 'duration': 255,
+ 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
}, {
# single video story without digitalData
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
@@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'timestamp': 1415867444,
'upload_date': '20141113',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
+ 'skip': 'redirects to TopGear home page',
}, {
# single video embedded with Morph
+ # TODO: replacement test page
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
'info_dict': {
'id': 'p041vhd0',
@@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'uploader': 'BBC Sport',
'uploader_id': 'bbc_sport',
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'Georestricted to UK',
+ 'skip': 'Video no longer in page',
}, {
- # single video with playlist.sxml URL in playlist param
+ # single video in __INITIAL_DATA__
'url': 'http://www.bbc.com/sport/0/football/33653409',
'info_dict': {
'id': 'p02xycnp',
'ext': 'mp4',
- 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
- 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
+ 'title': 'Ronaldo to Man Utd, Arsenal to spend?',
+ 'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
+ 'timestamp': 1437750175,
+ 'upload_date': '20150724',
+ 'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
'duration': 140,
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
}, {
- # article with multiple videos embedded with playlist.sxml in playlist param
+ # article with multiple videos embedded with Morph.setPayload
'url': 'http://www.bbc.com/sport/0/football/34475836',
'info_dict': {
'id': '34475836',
@@ -755,6 +751,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
},
'playlist_count': 3,
}, {
+ # Testing noplaylist
+ 'url': 'http://www.bbc.com/sport/0/football/34475836',
+ 'info_dict': {
+ 'id': 'p034ppnv',
+ 'ext': 'mp4',
+ 'title': 'All you need to know about Jurgen Klopp',
+ 'timestamp': 1444335081,
+ 'upload_date': '20151008',
+ 'duration': 122.0,
+ 'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
+ },
+ 'params': {
+ 'noplaylist': True,
+ },
+ }, {
# school report article with single video
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
'info_dict': {
@@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'title': 'School which breaks down barriers in Jerusalem',
},
'playlist_count': 1,
+ 'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
}, {
# single video with playlist URL from weather section
'url': 'http://www.bbc.com/weather/features/33601775',
@@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'thumbnail': r're:https?://.+/.+\.jpg',
'timestamp': 1437785037,
'upload_date': '20150725',
+ 'duration': 105,
},
}, {
# video with window.__INITIAL_DATA__ and value as JSON string
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
'info_dict': {
- 'id': 'p0b71qth',
+ 'id': 'p0b779gc',
'ext': 'mp4',
'title': 'Why France is making this woman a national hero',
- 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
+ 'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
'thumbnail': r're:https?://.+/.+\.jpg',
- 'timestamp': 1638230731,
- 'upload_date': '20211130',
+ 'timestamp': 1638215626,
+ 'upload_date': '20211129',
+ 'duration': 125,
+ },
+ }, {
+ # video with script id __NEXT_DATA__ and value as JSON string
+ 'url': 'https://www.bbc.com/news/uk-68546268',
+ 'info_dict': {
+ 'id': 'p0hj0lq7',
+ 'ext': 'mp4',
+ 'title': 'Nasser Hospital doctor describes his treatment by IDF',
+ 'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
+ 'thumbnail': r're:https?://.+/.+\.jpg',
+ 'timestamp': 1710188248,
+ 'upload_date': '20240311',
+ 'duration': 104,
},
}, {
# single video article embedded with data-media-vpid
@@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'uploader': 'Radio 3',
'uploader_id': 'bbc_radio_three',
},
+ 'skip': '404 Not Found',
}, {
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
'info_dict': {
@@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'ext': 'mp4',
'title': 'md5:2fabf12a726603193a2879a055f72514',
'description': 'Learn English words and phrases from this story',
+ 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
},
'add_ie': [BBCCoUkIE.ie_key()],
}, {
@@ -832,28 +861,30 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'info_dict': {
'id': 'p07c6sb9',
'ext': 'mp4',
- 'title': 'How positive thinking is harming your happiness',
- 'alt_title': 'The downsides of positive thinking',
- 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
+ 'title': 'The downsides of positive thinking',
+ 'description': 'The downsides of positive thinking',
'duration': 235,
- 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
- 'upload_date': '20190604',
- 'categories': ['Psychology'],
+ 'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
+ 'upload_date': '20220223',
+ 'timestamp': 1645632746,
},
}, {
# BBC Sounds
- 'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
+ 'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
'info_dict': {
- 'id': 'm001q789',
+ 'id': 'p0hrw4nr',
'ext': 'mp4',
- 'title': 'The Night Tracks Mix - Music for the darkling hour',
- 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
- 'chapters': 'count:8',
- 'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
- 'uploader': 'Radio 3',
- 'duration': 1800,
- 'uploader_id': 'bbc_radio_three',
- },
+ 'title': 'Are our coastlines being washed away?',
+ 'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
+ 'timestamp': 1713556800,
+ 'upload_date': '20240419',
+ 'duration': 1588,
+ 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
+ 'uploader': 'World Service',
+ 'uploader_id': 'bbc_world_service',
+ 'series': 'CrowdScience',
+ 'chapters': [],
+ }
}, { # onion routes
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
'only_matching': True,
@@ -1008,8 +1039,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
webpage, 'group id', default=None)
if group_id:
return self.url_result(
- 'https://www.bbc.co.uk/programmes/%s' % group_id,
- ie=BBCCoUkIE.ie_key())
+ f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
programme_id = self._search_regex(
@@ -1069,83 +1099,133 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
}
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
- # There are several setPayload calls may be present but the video
- # seems to be always related to the first one
- morph_payload = self._parse_json(
- self._search_regex(
- r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
- webpage, 'morph payload', default='{}'),
- playlist_id, fatal=False)
+ # Several setPayload calls may be present but the video(s)
+ # should be in one that mentions leadMedia or videoData
+ morph_payload = self._search_json(
+ r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
+ contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
+ default={})
if morph_payload:
- components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
- for component in components:
- if not isinstance(component, dict):
- continue
- lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
- if not lead_media:
- continue
- identifiers = lead_media.get('identifiers')
- if not identifiers or not isinstance(identifiers, dict):
- continue
- programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
+ for lead_media in traverse_obj(morph_payload, (
+ 'body', 'components', ..., 'props', 'leadMedia', {dict})):
+ programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
if not programme_id:
continue
- title = lead_media.get('title') or self._og_search_title(webpage)
formats, subtitles = self._download_media_selector(programme_id)
- description = lead_media.get('summary')
- uploader = lead_media.get('masterBrand')
- uploader_id = lead_media.get('mid')
- duration = None
- duration_d = lead_media.get('duration')
- if isinstance(duration_d, dict):
- duration = parse_duration(dict_get(
- duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
return {
'id': programme_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
+ 'title': lead_media.get('title') or self._og_search_title(webpage),
+ **traverse_obj(lead_media, {
+ 'description': ('summary', {str}),
+ 'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
+ 'uploader': ('masterBrand', {str}),
+ 'uploader_id': ('mid', {str}),
+ }),
'formats': formats,
'subtitles': subtitles,
}
+ body = self._parse_json(traverse_obj(morph_payload, (
+ 'body', 'content', 'article', 'body')), playlist_id, fatal=False)
+ for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
+ if video_data.get('vpid'):
+ video_id = video_data['vpid']
+ formats, subtitles = self._download_media_selector(video_id)
+ entry = {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+ else:
+ video_id = video_data['pid']
+ entry = self.url_result(
+ f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
+ video_id, url_transparent=True)
+ entry.update({
+ 'timestamp': traverse_obj(morph_payload, (
+ 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
+ ),
+ **traverse_obj(video_data, {
+ 'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
+ 'title': (('title', 'caption'), {str}, any),
+ 'duration': ('duration', {parse_duration}),
+ }),
+ })
+ if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
+ return entry
+ entries.append(entry)
+ if entries:
+ playlist_title = traverse_obj(morph_payload, (
+ 'body', 'content', 'article', 'headline', {str})) or playlist_title
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
- preload_state = self._parse_json(self._search_regex(
- r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
- 'preload state', default='{}'), playlist_id, fatal=False)
- if preload_state:
- current_programme = preload_state.get('programmes', {}).get('current') or {}
- programme_id = current_programme.get('id')
- if current_programme and programme_id and current_programme.get('type') == 'playable_item':
- title = current_programme.get('titles', {}).get('tertiary') or playlist_title
- formats, subtitles = self._download_media_selector(programme_id)
- synopses = current_programme.get('synopses') or {}
- network = current_programme.get('network') or {}
- duration = int_or_none(
- current_programme.get('duration', {}).get('value'))
- thumbnail = None
- image_url = current_programme.get('image_url')
- if image_url:
- thumbnail = image_url.replace('{recipe}', 'raw')
+ # various PRELOADED_STATE JSON
+ preload_state = self._search_json(
+ r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
+ 'preload state', playlist_id, transform_source=js_to_json, default={})
+ # PRELOADED_STATE with current programmme
+ current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
+ programme_id = traverse_obj(current_programme, ('id', {str}))
+ if programme_id and current_programme.get('type') == 'playable_item':
+ title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
+ formats, subtitles = self._download_media_selector(programme_id)
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'formats': formats,
+ **traverse_obj(current_programme, {
+ 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
+ 'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
+ 'duration': ('duration', 'value', {int_or_none}),
+ 'uploader': ('network', 'short_title', {str}),
+ 'uploader_id': ('network', 'id', {str}),
+ 'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
+ 'series': ('titles', 'primary', {str}),
+ }),
+ 'subtitles': subtitles,
+ 'chapters': traverse_obj(preload_state, (
+ 'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
+ 'title': ('titles', {lambda x: join_nonempty(
+ 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
+ 'start_time': ('offset', 'start', {float_or_none}),
+ 'end_time': ('offset', 'end', {float_or_none}),
+ })
+ ),
+ }
+
+ # PWA_PRELOADED_STATE with article video asset
+ asset_id = traverse_obj(preload_state, (
+ 'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
+ 'assetVideo', 0, {str}, any))
+ if asset_id:
+ video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
+ if video_id:
+ article = traverse_obj(preload_state, (
+ 'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
+
+ def image_url(image_id):
+ return traverse_obj(preload_state, (
+ 'entities', 'images', image_id, 'url',
+ {lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
+
+ formats, subtitles = self._download_media_selector(video_id)
return {
- 'id': programme_id,
- 'title': title,
- 'description': dict_get(synopses, ('long', 'medium', 'short')),
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'uploader': network.get('short_title'),
- 'uploader_id': network.get('id'),
+ 'id': video_id,
+ **traverse_obj(preload_state, ('entities', 'videos', asset_id, {
+ 'title': ('title', {str}),
+ 'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
+ 'thumbnail': (0, {image_url}),
+ 'duration': ('duration', {int_or_none}),
+ })),
'formats': formats,
'subtitles': subtitles,
- 'chapters': traverse_obj(preload_state, (
- 'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
- 'title': ('titles', {lambda x: join_nonempty(
- 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
- 'start_time': ('offset', 'start', {float_or_none}),
- 'end_time': ('offset', 'end', {float_or_none}),
- })) or None,
+ 'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
}
+ else:
+ return self.url_result(
+ f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
+ asset_id, playlist_title, display_id=playlist_id,
+ description=playlist_description)
bbc3_config = self._parse_json(
self._search_regex(
@@ -1191,6 +1271,28 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
+ def parse_model(model):
+ """Extract single video from model structure"""
+ item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
+ if not item_id:
+ return
+ formats, subtitles = self._download_media_selector(item_id)
+ return {
+ 'id': item_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **traverse_obj(model, {
+ 'title': ('title', {str}),
+ 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
+ 'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
+ 'duration': ('versions', 0, 'duration', {int}),
+ 'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
+ })
+ }
+
+ def is_type(*types):
+ return lambda _, v: v['type'] in types
+
initial_data = self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
'quoted preload state', default=None)
@@ -1202,6 +1304,19 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
if initial_data:
+ for video_data in traverse_obj(initial_data, (
+ 'stores', 'article', 'articleBodyContent', is_type('video'))):
+ model = traverse_obj(video_data, (
+ 'model', 'blocks', is_type('aresMedia'),
+ 'model', 'blocks', is_type('aresMediaMetadata'),
+ 'model', {dict}, any))
+ entry = parse_model(model)
+ if entry:
+ entries.append(entry)
+ if entries:
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
def parse_media(media):
if not media:
return
@@ -1234,27 +1349,90 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'subtitles': subtitles,
'timestamp': item_time,
'description': strip_or_none(item_desc),
+ 'duration': int_or_none(item.get('duration')),
})
- for resp in (initial_data.get('data') or {}).values():
- name = resp.get('name')
+
+ for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
+ name = resp['name']
if name == 'media-experience':
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
elif name == 'article':
- for block in (try_get(resp,
- (lambda x: x['data']['blocks'],
- lambda x: x['data']['content']['model']['blocks'],),
- list) or []):
- if block.get('type') not in ['media', 'video']:
- continue
- parse_media(block.get('model'))
+ for block in traverse_obj(resp, (
+ 'data', (None, ('content', 'model')), 'blocks',
+ is_type('media', 'video'), 'model', {dict})):
+ parse_media(block)
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
+ # extract from SIMORGH_DATA hydration JSON
+ simorgh_data = self._search_json(
+ r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
+ 'simorgh data', playlist_id, default={})
+ if simorgh_data:
+ done = False
+ for video_data in traverse_obj(simorgh_data, (
+ 'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
+ model = traverse_obj(video_data, (
+ 'model', 'blocks', is_type('aresMedia'),
+ 'model', 'blocks', is_type('aresMediaMetadata'),
+ 'model', {dict}, any))
+ if video_data['type'] == 'video':
+ entry = parse_model(model)
+ else: # legacyMedia: no duration, subtitles
+ block_id, entry = traverse_obj(model, ('blockId', {str})), None
+ media_data = traverse_obj(simorgh_data, (
+ 'pageData', 'promo', 'media',
+ {lambda x: x if x['id'] == block_id else None}))
+ formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
+ 'url': ('url', {url_or_none}),
+ 'ext': ('format', {str}),
+ 'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
+ }))
+ if formats:
+ entry = {
+ 'id': block_id,
+ 'display_id': playlist_id,
+ 'formats': formats,
+ 'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
+ **traverse_obj(model, {
+ 'title': ('title', {str}),
+ 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
+ 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
+ 'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
+ }),
+ }
+ done = True
+ if entry:
+ entries.append(entry)
+ if done:
+ break
+ if entries:
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
def extract_all(pattern):
return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False),
re.findall(pattern, webpage))))
+ # US accessed article with single embedded video (e.g.
+ # https://www.bbc.com/news/uk-68546268)
+ next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
+ ('props', 'pageProps', 'page'))
+ model = traverse_obj(next_data, (
+ ..., 'contents', is_type('video'),
+ 'model', 'blocks', is_type('media'),
+ 'model', 'blocks', is_type('mediaMetadata'),
+ 'model', {dict}, any))
+ if model and (entry := parse_model(model)):
+ if not entry.get('timestamp'):
+ entry['timestamp'] = traverse_obj(next_data, (
+ ..., 'contents', is_type('timestamp'), 'model',
+ 'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
+ entries.append(entry)
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
# Multiple video article (e.g.
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py
index 042b322..da98ac3 100644
--- a/yt_dlp/extractor/beeg.py
+++ b/yt_dlp/extractor/beeg.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
str_or_none,
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index fee4b29..b38c90b 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -93,11 +93,11 @@ class BilibiliBaseIE(InfoExtractor):
return formats
- def _download_playinfo(self, video_id, cid):
+ def _download_playinfo(self, video_id, cid, headers=None):
return self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
- note=f'Downloading video formats for cid {cid}')['data']
+ note=f'Downloading video formats for cid {cid}', headers=headers)['data']
def json2srt(self, json_data):
srt_data = ''
@@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage, urlh = self._download_webpage_handle(url, video_id)
+ headers = self.geo_verification_headers()
+ webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
if not self._match_valid_url(urlh.url):
return self.url_result(urlh.url)
@@ -531,7 +532,7 @@ class BiliBiliIE(BilibiliBaseIE):
self._download_json(
'https://api.bilibili.com/x/player/pagelist', video_id,
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
- note='Extracting videos in anthology'),
+ note='Extracting videos in anthology', headers=headers),
'data', expected_type=list) or []
is_anthology = len(page_list_json) > 1
@@ -552,7 +553,7 @@ class BiliBiliIE(BilibiliBaseIE):
festival_info = {}
if is_festival:
- play_info = self._download_playinfo(video_id, cid)
+ play_info = self._download_playinfo(video_id, cid, headers=headers)
festival_info = traverse_obj(initial_state, {
'uploader': ('videoInfo', 'upName'),
@@ -666,14 +667,15 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
def _real_extract(self, url):
episode_id = self._match_id(url)
- webpage = self._download_webpage(url, episode_id)
+ headers = self.geo_verification_headers()
+ webpage = self._download_webpage(url, episode_id, headers=headers)
if '您所在的地区无法观看本片' in webpage:
raise GeoRestrictedError('This video is restricted')
elif '正在观看预览,大会员免费看全片' in webpage:
self.raise_login_required('This video is for premium members only')
- headers = {'Referer': url, **self.geo_verification_headers()}
+ headers['Referer'] = url
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
@@ -724,7 +726,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
'__post_extractor': self.extract_comments(aid),
- 'http_headers': headers,
+ 'http_headers': {'Referer': url},
}
@@ -1043,15 +1045,17 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
try:
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
- playlist_id, note=f'Downloading page {page_idx}', query=query)
+ playlist_id, note=f'Downloading page {page_idx}', query=query,
+ headers={'referer': url})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise
- if response['code'] == -401:
+ if response['code'] in (-352, -401):
raise ExtractorError(
- 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+ f'Request is blocked by server ({-response["code"]}), '
+ 'please add cookies, wait and try later.', expected=True)
return response['data']
def get_metadata(page_data):
diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py
index e875957..aa3d63e 100644
--- a/yt_dlp/extractor/bleacherreport.py
+++ b/yt_dlp/extractor/bleacherreport.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .amp import AMPIE
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py
index 3d6e033..ef0151d 100644
--- a/yt_dlp/extractor/blogger.py
+++ b/yt_dlp/extractor/blogger.py
@@ -1,3 +1,4 @@
+from .common import InfoExtractor
from ..utils import (
mimetype2ext,
parse_duration,
@@ -5,7 +6,6 @@ from ..utils import (
str_or_none,
traverse_obj,
)
-from .common import InfoExtractor
class BloggerIE(InfoExtractor):
diff --git a/yt_dlp/extractor/boosty.py b/yt_dlp/extractor/boosty.py
index fb14ca1..d3aab7a 100644
--- a/yt_dlp/extractor/boosty.py
+++ b/yt_dlp/extractor/boosty.py
@@ -1,7 +1,11 @@
+import json
+import urllib.parse
+
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
ExtractorError,
+ bug_reports_message,
int_or_none,
qualities,
str_or_none,
@@ -162,9 +166,19 @@ class BoostyIE(InfoExtractor):
def _real_extract(self, url):
user, post_id = self._match_valid_url(url).group('user', 'post_id')
+
+ auth_headers = {}
+ auth_cookie = self._get_cookies('https://boosty.to/').get('auth')
+ if auth_cookie is not None:
+ try:
+ auth_data = json.loads(urllib.parse.unquote(auth_cookie.value))
+ auth_headers['Authorization'] = f'Bearer {auth_data["accessToken"]}'
+ except (json.JSONDecodeError, KeyError):
+ self.report_warning(f'Failed to extract token from auth cookie{bug_reports_message()}')
+
post = self._download_json(
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
- note='Downloading post data', errnote='Unable to download post data')
+ note='Downloading post data', errnote='Unable to download post data', headers=auth_headers)
post_title = post.get('title')
if not post_title:
@@ -202,7 +216,9 @@ class BoostyIE(InfoExtractor):
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
}, get_all=False)})
- if not entries:
+ if not entries and not post.get('hasAccess'):
+ self.raise_login_required('This post requires a subscription', metadata_available=True)
+ elif not entries:
raise ExtractorError('No videos found', expected=True)
if len(entries) == 1:
return entries[0]
diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py
index 92f8ea2..2675866 100644
--- a/yt_dlp/extractor/bostonglobe.py
+++ b/yt_dlp/extractor/bostonglobe.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import (
extract_attributes,
)
diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py
index 51f9eb7..da06cc3 100644
--- a/yt_dlp/extractor/boxcast.py
+++ b/yt_dlp/extractor/boxcast.py
@@ -1,9 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- js_to_json,
- traverse_obj,
- unified_timestamp
-)
+from ..utils import js_to_json, traverse_obj, unified_timestamp
class BoxCastVideoIE(InfoExtractor):
diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py
index 1200437..04b1dd8 100644
--- a/yt_dlp/extractor/brainpop.py
+++ b/yt_dlp/extractor/brainpop.py
@@ -6,7 +6,7 @@ from ..utils import (
classproperty,
int_or_none,
traverse_obj,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index 61b1841..4190e1a 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -12,10 +12,11 @@ from ..compat import (
)
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
+ UnsupportedError,
clean_html,
dict_get,
extract_attributes,
- ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
@@ -29,7 +30,6 @@ from ..utils import (
try_get,
unescapeHTML,
unsmuggle_url,
- UnsupportedError,
update_url_query,
url_or_none,
)
diff --git a/yt_dlp/extractor/brilliantpala.py b/yt_dlp/extractor/brilliantpala.py
index 0bf8622..950a70a 100644
--- a/yt_dlp/extractor/brilliantpala.py
+++ b/yt_dlp/extractor/brilliantpala.py
@@ -27,8 +27,17 @@ class BrilliantpalaBaseIE(InfoExtractor):
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
def _perform_login(self, username, password):
- login_form = self._hidden_inputs(self._download_webpage(
- self._LOGIN_API, None, 'Downloading login page'))
+ login_page, urlh = self._download_webpage_handle(
+ self._LOGIN_API, None, 'Downloading login page', expected_status=401)
+ if urlh.status != 401 and not urlh.url.startswith(self._LOGIN_API):
+ self.write_debug('Cookies are valid, no login required.')
+ return
+
+ if urlh.status == 401:
+ self.write_debug('Got HTTP Error 401; cookies have been invalidated')
+ login_page = self._download_webpage(self._LOGIN_API, None, 'Re-downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
login_form.update({
'username': username,
'password': password,
diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py
deleted file mode 100644
index 4a22141..0000000
--- a/yt_dlp/extractor/cableav.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from .common import InfoExtractor
-
-
-class CableAVIE(InfoExtractor):
- _VALID_URL = r'https?://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
- _TESTS = [{
- 'url': 'https://cableav.tv/lS4iR9lWjN8/',
- 'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
- 'info_dict': {
- 'id': 'lS4iR9lWjN8',
- 'ext': 'mp4',
- 'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
- 'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_url = self._og_search_video_url(webpage, secure=False)
-
- formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'formats': formats,
- }
diff --git a/yt_dlp/extractor/caffeinetv.py b/yt_dlp/extractor/caffeinetv.py
new file mode 100644
index 0000000..aa107f8
--- /dev/null
+++ b/yt_dlp/extractor/caffeinetv.py
@@ -0,0 +1,74 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ traverse_obj,
+ urljoin,
+)
+
+
+class CaffeineTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)'
+ _TESTS = [{
+ 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
+ 'info_dict': {
+ 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
+ 'ext': 'mp4',
+ 'title': 'GOOOOD MORNINNNNN #highlights',
+ 'timestamp': 1654702180,
+ 'upload_date': '20220608',
+ 'uploader': 'RahJON Wicc',
+ 'uploader_id': 'TsuSurf',
+ 'duration': 3145,
+ 'age_limit': 17,
+ 'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'tags': ['highlights', 'battlerap'],
+ },
+ 'params': {
+ 'skip_download': 'm3u8',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id)
+ broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {}
+
+ video_url = broadcast_info['video_url']
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
+ else:
+ formats = [{'url': video_url}]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ **traverse_obj(json_data, {
+ 'like_count': ('like_count', {int_or_none}),
+ 'view_count': ('view_count', {int_or_none}),
+ 'comment_count': ('comment_count', {int_or_none}),
+ 'tags': ('tags', ..., {str}, {lambda x: x or None}),
+ 'uploader': ('user', 'name', {str}),
+ 'uploader_id': (((None, 'user'), 'username'), {str}, any),
+ 'is_live': ('is_live', {bool}),
+ }),
+ **traverse_obj(broadcast_info, {
+ 'title': ('broadcast_title', {str}),
+ 'duration': ('content_duration', {int_or_none}),
+ 'timestamp': ('broadcast_start_time', {parse_iso8601}),
+ 'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
+ }),
+ 'age_limit': {
+ # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
+ 'FOUR_PLUS': 0,
+ 'NINE_PLUS': 9,
+ 'TWELVE_PLUS': 12,
+ 'SEVENTEEN_PLUS': 17,
+ }.get(broadcast_info.get('content_rating'), 17),
+ }
diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py
index df5ca58..745e695 100644
--- a/yt_dlp/extractor/canalalpha.py
+++ b/yt_dlp/extractor/canalalpha.py
@@ -40,7 +40,7 @@ class CanalAlphaIE(InfoExtractor):
'id': '24484',
'ext': 'mp4',
'title': 'Ces innovations qui veulent rendre l’agriculture plus durable',
- 'description': 'md5:3de3f151180684621e85be7c10e4e613',
+ 'description': 'md5:85d594a3b5dc6ccfc4a85aba6e73b129',
'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg',
'upload_date': '20211026',
'duration': 360,
@@ -58,14 +58,25 @@ class CanalAlphaIE(InfoExtractor):
'duration': 360,
},
'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura',
+ 'info_dict': {
+ 'id': '33500',
+ 'ext': 'mp4',
+ 'title': 'Encore des mesures d\'économie dans le Jura',
+ 'description': 'md5:938b5b556592f2d1b9ab150268082a80',
+ 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_46665.jpg',
+ 'upload_date': '20240411',
+ 'duration': 105,
+ },
}]
def _real_extract(self, url):
- id = self._match_id(url)
- webpage = self._download_webpage(url, id)
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
data_json = self._parse_json(self._search_regex(
r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;',
- webpage, 'data_json'), id)['1']['data']['data']
+ webpage, 'data_json'), video_id)['1']['data']['data']
manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {}
subtitles = {}
formats = [{
@@ -75,15 +86,17 @@ class CanalAlphaIE(InfoExtractor):
'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
} for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
if manifests.get('hls'):
- m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id)
- formats.extend(m3u8_frmts)
- subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ manifests['hls'], video_id, m3u8_id='hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
if manifests.get('dash'):
- dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
- formats.extend(dash_frmts)
- subtitles = self._merge_subtitles(subtitles, dash_subs)
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ manifests['dash'], video_id, mpd_id='dash', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
return {
- 'id': id,
+ 'id': video_id,
'title': data_json.get('title').strip(),
'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))),
'thumbnail': data_json.get('poster'),
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index ff320dd..a418026 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -151,7 +151,7 @@ class CBCIE(InfoExtractor):
class CBCPlayerIE(InfoExtractor):
IE_NAME = 'cbc.ca:player'
- _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
+ _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c',
@@ -278,6 +278,28 @@ class CBCPlayerIE(InfoExtractor):
'media_type': 'Full Program',
},
}, {
+ 'url': 'https://www.cbc.ca/player/play/video/1.7194274',
+ 'md5': '188b96cf6bdcb2540e178a6caa957128',
+ 'info_dict': {
+ 'id': '2334524995812',
+ 'ext': 'mp4',
+ 'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
+ 'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
+ 'timestamp': 1714788791,
+ 'duration': 77.678,
+ 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
+ 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
+ 'uploader': 'CBCC-NEW',
+ 'chapters': 'count:0',
+ 'upload_date': '20240504',
+ 'categories': 'count:3',
+ 'series': 'The National',
+ 'tags': 'count:15',
+ 'creators': ['encoder'],
+ 'location': 'Canada',
+ 'media_type': 'Excerpt',
+ },
+ }, {
'url': 'cbcplayer:1.7159484',
'only_matching': True,
}, {
diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py
index cf83021..aca9782 100644
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@@ -5,14 +5,14 @@ from .youtube import YoutubeIE
from ..utils import (
ExtractorError,
extract_attributes,
+ find_xpath_attr,
get_element_html_by_id,
int_or_none,
- find_xpath_attr,
smuggle_url,
- xpath_element,
- xpath_text,
update_url_query,
url_or_none,
+ xpath_element,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py
index 90b4d08..0a5a524 100644
--- a/yt_dlp/extractor/cda.py
+++ b/yt_dlp/extractor/cda.py
@@ -16,7 +16,6 @@ from ..utils import (
merge_dicts,
multipart_encode,
parse_duration,
- random_birthday,
traverse_obj,
try_call,
try_get,
@@ -63,38 +62,57 @@ class CDAIE(InfoExtractor):
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'crash404',
- 'view_count': int,
'average_rating': float,
'duration': 137,
'age_limit': 0,
+ 'upload_date': '20160220',
+ 'timestamp': 1455968218,
}
}, {
- # Age-restricted
- 'url': 'http://www.cda.pl/video/1273454c4',
+ # Age-restricted with vfilm redirection
+ 'url': 'https://www.cda.pl/video/8753244c4',
+ 'md5': 'd8eeb83d63611289507010d3df3bb8b3',
'info_dict': {
- 'id': '1273454c4',
+ 'id': '8753244c4',
'ext': 'mp4',
- 'title': 'Bronson (2008) napisy HD 1080p',
- 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
+ 'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
+ 'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
'height': 1080,
- 'uploader': 'boniek61',
+ 'uploader': 'arhn eu',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 5554,
+ 'duration': 991,
'age_limit': 18,
- 'view_count': int,
'average_rating': float,
- },
+ 'timestamp': 1633888264,
+ 'upload_date': '20211010',
+ }
+ }, {
+ # Age-restricted without vfilm redirection
+ 'url': 'https://www.cda.pl/video/17028157b8',
+ 'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
+ 'info_dict': {
+ 'id': '17028157b8',
+ 'ext': 'mp4',
+ 'title': 'STENDUPY MICHAŁ OGIŃSKI',
+ 'description': 'md5:5851f3272bfc31f762d616040a1d609a',
+ 'height': 480,
+ 'uploader': 'oginski',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 18855,
+ 'age_limit': 18,
+ 'average_rating': float,
+ 'timestamp': 1699705901,
+ 'upload_date': '20231111',
+ }
}, {
'url': 'http://ebd.cda.pl/0x0/5749950c',
'only_matching': True,
}]
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
- form_data = random_birthday('rok', 'miesiac', 'dzien')
- form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
- data, content_type = multipart_encode(form_data)
+ data, content_type = multipart_encode({'age_confirm': ''})
return self._download_webpage(
- urljoin(url, '/a/validatebirth'), video_id, *args,
+ url, video_id, *args,
data=data, headers={
'Referer': url,
'Content-Type': content_type,
@@ -164,7 +182,7 @@ class CDAIE(InfoExtractor):
if 'Authorization' in self._API_HEADERS:
return self._api_extract(video_id)
else:
- return self._web_extract(video_id, url)
+ return self._web_extract(video_id)
def _api_extract(self, video_id):
meta = self._download_json(
@@ -197,9 +215,9 @@ class CDAIE(InfoExtractor):
'view_count': meta.get('views'),
}
- def _web_extract(self, video_id, url):
+ def _web_extract(self, video_id):
self._set_cookie('cda.pl', 'cda.player', 'html5')
- webpage = self._download_webpage(
+ webpage, urlh = self._download_webpage_handle(
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
@@ -209,10 +227,10 @@ class CDAIE(InfoExtractor):
self.raise_geo_restricted()
need_confirm_age = False
- if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
+ if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
webpage, 'birthday validate form', default=None):
webpage = self._download_age_confirm_page(
- url, video_id, note='Confirming age')
+ urlh.url, video_id, note='Confirming age')
need_confirm_age = True
formats = []
@@ -222,9 +240,6 @@ class CDAIE(InfoExtractor):
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
''', webpage, 'uploader', default=None, group='uploader')
- view_count = self._search_regex(
- r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
- 'view_count', default=None)
average_rating = self._search_regex(
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
@@ -235,7 +250,6 @@ class CDAIE(InfoExtractor):
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'uploader': uploader,
- 'view_count': int_or_none(view_count),
'average_rating': float_or_none(average_rating),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py
index 156b6a3..5d63357 100644
--- a/yt_dlp/extractor/ceskatelevize.py
+++ b/yt_dlp/extractor/ceskatelevize.py
@@ -101,7 +101,7 @@ class CeskaTelevizeIE(InfoExtractor):
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title:
- playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
+ playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0]
playlist_description = self._og_search_description(webpage, default=None)
if playlist_description:
playlist_description = playlist_description.replace('\xa0', ' ')
diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py
index 9cffa11..745b71f 100644
--- a/yt_dlp/extractor/cinetecamilano.py
+++ b/yt_dlp/extractor/cinetecamilano.py
@@ -1,4 +1,5 @@
import json
+
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py
index 006a713..67b56e0 100644
--- a/yt_dlp/extractor/clippit.py
+++ b/yt_dlp/extractor/clippit.py
@@ -1,11 +1,11 @@
+import re
+
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
qualities,
)
-import re
-
class ClippitIE(InfoExtractor):
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 57bbf9b..b99b7e5 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1,5 +1,6 @@
import base64
import collections
+import functools
import getpass
import hashlib
import http.client
@@ -21,7 +22,6 @@ import urllib.parse
import urllib.request
import xml.etree.ElementTree
-from ..compat import functools # isort: split
from ..compat import (
compat_etree_fromstring,
compat_expanduser,
@@ -957,7 +957,8 @@ class InfoExtractor:
if urlh is False:
assert not fatal
return False
- content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+ content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
+ encoding=encoding, data=data)
return (content, urlh)
@staticmethod
@@ -1005,8 +1006,10 @@ class InfoExtractor:
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
- def _request_dump_filename(self, url, video_id):
- basen = f'{video_id}_{url}'
+ def _request_dump_filename(self, url, video_id, data=None):
+ if data is not None:
+ data = hashlib.md5(data).hexdigest()
+ basen = join_nonempty(video_id, data, url, delim='_')
trim_length = self.get_param('trim_file_name') or 240
if len(basen) > trim_length:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
@@ -1028,7 +1031,8 @@ class InfoExtractor:
except LookupError:
return webpage_bytes.decode('utf-8', 'replace')
- def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
+ prefix=None, encoding=None, data=None):
webpage_bytes = urlh.read()
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
@@ -1037,7 +1041,9 @@ class InfoExtractor:
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
if self.get_param('write_pages'):
- filename = self._request_dump_filename(urlh.url, video_id)
+ if isinstance(url_or_request, Request):
+ data = self._create_request(url_or_request, data).data
+ filename = self._request_dump_filename(urlh.url, video_id, data)
self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
@@ -1098,7 +1104,7 @@ class InfoExtractor:
impersonate=None, require_impersonation=False):
if self.get_param('load_pages'):
url_or_request = self._create_request(url_or_request, data, headers, query)
- filename = self._request_dump_filename(url_or_request.url, video_id)
+ filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
self.to_screen(f'Loading request from {filename}')
try:
with open(filename, 'rb') as dumpf:
@@ -1738,12 +1744,16 @@ class InfoExtractor:
traverse_json_ld(json_ld)
return filter_dict(info)
- def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
- return self._parse_json(
- self._search_regex(
- r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
- webpage, 'next.js data', fatal=fatal, **kw),
- video_id, transform_source=transform_source, fatal=fatal)
+ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
+ if default == '{}':
+ self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
+ default = {}
+ if default is not NO_DEFAULT:
+ fatal = False
+
+ return self._search_json(
+ r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
+ video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
@@ -3374,23 +3384,16 @@ class InfoExtractor:
return formats
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
- mobj = re.search(
- r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
- webpage)
- if mobj:
- try:
- jwplayer_data = self._parse_json(mobj.group('options'),
- video_id=video_id,
- transform_source=transform_source)
- except ExtractorError:
- pass
- else:
- if isinstance(jwplayer_data, dict):
- return jwplayer_data
-
- def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+ return self._search_json(
+ r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
+ webpage, 'JWPlayer data', video_id,
+ # must be a {...} or sequence, ending
+ contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))',
+ transform_source=transform_source, default=None)
+
+ def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs):
jwplayer_data = self._find_jwplayer_data(
- webpage, video_id, transform_source=js_to_json)
+ webpage, video_id, transform_source=transform_source)
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)
@@ -3422,22 +3425,14 @@ class InfoExtractor:
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
subtitles = {}
- tracks = video_data.get('tracks')
- if tracks and isinstance(tracks, list):
- for track in tracks:
- if not isinstance(track, dict):
- continue
- track_kind = track.get('kind')
- if not track_kind or not isinstance(track_kind, str):
- continue
- if track_kind.lower() not in ('captions', 'subtitles'):
- continue
- track_url = urljoin(base_url, track.get('file'))
- if not track_url:
- continue
- subtitles.setdefault(track.get('label') or 'en', []).append({
- 'url': self._proto_relative_url(track_url)
- })
+ for track in traverse_obj(video_data, (
+ 'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))):
+ track_url = urljoin(base_url, track.get('file'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('label') or 'en', []).append({
+ 'url': self._proto_relative_url(track_url)
+ })
entry = {
'id': this_video_id,
@@ -3522,7 +3517,7 @@ class InfoExtractor:
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
# of jwplayer.flash.swf
rtmp_url_parts = re.split(
- r'((?:mp4|mp3|flv):)', source_url, 1)
+ r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
if len(rtmp_url_parts) == 3:
rtmp_url, prefix, play_path = rtmp_url_parts
a_format.update({
diff --git a/yt_dlp/extractor/commonmistakes.py b/yt_dlp/extractor/commonmistakes.py
index 1d3b61c..4514424 100644
--- a/yt_dlp/extractor/commonmistakes.py
+++ b/yt_dlp/extractor/commonmistakes.py
@@ -40,3 +40,19 @@ class UnicodeBOMIE(InfoExtractor):
'Your URL starts with a Byte Order Mark (BOM). '
'Removing the BOM and looking for "%s" ...' % real_url)
return self.url_result(real_url)
+
+
+class BlobIE(InfoExtractor):
+ IE_DESC = False
+ _VALID_URL = r'blob:'
+
+ _TESTS = [{
+ 'url': 'blob:https://www.youtube.com/4eb3d090-a761-46e6-8083-c32016a36e3b',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ 'You\'ve asked yt-dlp to download a blob URL. '
+ 'A blob URL exists only locally in your browser. '
+ 'It is not possible for yt-dlp to access it.', expected=True)
diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py
index bcc34dd..0a98c98 100644
--- a/yt_dlp/extractor/corus.py
+++ b/yt_dlp/extractor/corus.py
@@ -1,7 +1,7 @@
from .theplatform import ThePlatformFeedIE
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
float_or_none,
int_or_none,
)
diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py
index 1ef90b5..0cb7d94 100644
--- a/yt_dlp/extractor/crackle.py
+++ b/yt_dlp/extractor/crackle.py
@@ -6,6 +6,7 @@ import time
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
determine_ext,
float_or_none,
int_or_none,
@@ -13,7 +14,6 @@ from ..utils import (
parse_age_limit,
parse_duration,
url_or_none,
- ExtractorError
)
diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index 118b575..ea54f01 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -2,6 +2,7 @@ import base64
import uuid
from .common import InfoExtractor
+from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
@@ -24,11 +25,16 @@ class CrunchyrollBaseIE(InfoExtractor):
_BASE_URL = 'https://www.crunchyroll.com'
_API_BASE = 'https://api.crunchyroll.com'
_NETRC_MACHINE = 'crunchyroll'
+ _SWITCH_USER_AGENT = 'Crunchyroll/1.8.0 Nintendo Switch/12.3.12.0 UE4/4.27'
+ _REFRESH_TOKEN = None
_AUTH_HEADERS = None
+ _AUTH_EXPIRY = None
_API_ENDPOINT = None
- _BASIC_AUTH = None
+ _BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
+ 't-kdgp2h8c3jub8fn0fq',
+ 'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
+ )).encode()).decode()
_IS_PREMIUM = None
- _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
_LOCALE_LOOKUP = {
'ar': 'ar-SA',
'de': 'de-DE',
@@ -43,69 +49,78 @@ class CrunchyrollBaseIE(InfoExtractor):
'hi': 'hi-IN',
}
- @property
- def is_logged_in(self):
- return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
+ def _set_auth_info(self, response):
+ CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
+ CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
+ CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
+
+ def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
+ try:
+ return self._download_json(
+ f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
+ headers=headers, data=urlencode_postdata(data), impersonate=True)
+ except ExtractorError as error:
+ if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
+ raise
+ if target := error.cause.response.extensions.get('impersonate'):
+ raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
+ raise ExtractorError(
+ 'Request blocked by Cloudflare. '
+ 'Install the required impersonation dependency if possible, '
+ 'or else navigate to Crunchyroll in your browser, '
+ 'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
+ 'and your browser\'s User-Agent (with --user-agent)', expected=True)
def _perform_login(self, username, password):
- if self.is_logged_in:
+ if not CrunchyrollBaseIE._REFRESH_TOKEN:
+ CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
+ if CrunchyrollBaseIE._REFRESH_TOKEN:
return
- upsell_response = self._download_json(
- f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
- query={
- 'sess_id': 1,
- 'device_id': 'whatvalueshouldbeforweb',
- 'device_type': 'com.crunchyroll.static',
- 'access_token': 'giKq5eY27ny3cqz',
- 'referer': f'{self._BASE_URL}/welcome/login'
- })
- if upsell_response['code'] != 'ok':
- raise ExtractorError('Could not get session id')
- session_id = upsell_response['data']['session_id']
-
- login_response = self._download_json(
- f'{self._API_BASE}/login.1.json', None, 'Logging in',
- data=urlencode_postdata({
- 'account': username,
- 'password': password,
- 'session_id': session_id
- }))
- if login_response['code'] != 'ok':
- raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
- if not self.is_logged_in:
- raise ExtractorError('Login succeeded but did not set etp_rt cookie')
+ try:
+ login_response = self._request_token(
+ headers={'Authorization': self._BASIC_AUTH}, data={
+ 'username': username,
+ 'password': password,
+ 'grant_type': 'password',
+ 'scope': 'offline_access',
+ }, note='Logging in', errnote='Failed to log in')
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+ raise ExtractorError('Invalid username and/or password', expected=True)
+ raise
+
+ CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
+ self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
+ self._set_auth_info(login_response)
def _update_auth(self):
- if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
+ if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
return
- if not CrunchyrollBaseIE._BASIC_AUTH:
- cx_api_param = self._CLIENT_ID[self.is_logged_in]
- self.write_debug(f'Using cxApiParam={cx_api_param}')
- CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
-
- auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
- if self.is_logged_in:
- grant_type = 'etp_rt_cookie'
+ auth_headers = {'Authorization': self._BASIC_AUTH}
+ if CrunchyrollBaseIE._REFRESH_TOKEN:
+ data = {
+ 'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
+ 'grant_type': 'refresh_token',
+ 'scope': 'offline_access',
+ }
else:
- grant_type = 'client_id'
+ data = {'grant_type': 'client_id'}
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
try:
- auth_response = self._download_json(
- f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
- headers=auth_headers, data=f'grant_type={grant_type}'.encode())
+ auth_response = self._request_token(auth_headers, data)
except ExtractorError as error:
- if isinstance(error.cause, HTTPError) and error.cause.status == 403:
- raise ExtractorError(
- 'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
- 'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
- 'and your browser\'s User-Agent (with --user-agent)', expected=True)
- raise
+ username, password = self._get_login_info()
+ if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
+ raise
+ self.to_screen('Refresh token has expired. Re-logging in')
+ CrunchyrollBaseIE._REFRESH_TOKEN = None
+ self.cache.store(self._NETRC_MACHINE, username, None)
+ self._perform_login(username, password)
+ return
- CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
- CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
- CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
+ self._set_auth_info(auth_response)
def _locale_from_language(self, language):
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
@@ -166,9 +181,19 @@ class CrunchyrollBaseIE(InfoExtractor):
display_id = identifier
self._update_auth()
- stream_response = self._download_json(
- f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
- display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
+ headers = {**CrunchyrollBaseIE._AUTH_HEADERS, 'User-Agent': self._SWITCH_USER_AGENT}
+ try:
+ stream_response = self._download_json(
+ f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
+ display_id, note='Downloading stream info', errnote='Failed to download stream info', headers=headers)
+ except ExtractorError as error:
+ if self.get_param('ignore_no_formats_error'):
+ self.report_warning(error.orig_msg)
+ return [], {}
+ elif isinstance(error.cause, HTTPError) and error.cause.status == 420:
+ raise ExtractorError(
+ 'You have reached the rate-limit for active streams; try again later', expected=True)
+ raise
available_formats = {'': ('', '', stream_response['url'])}
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
@@ -197,7 +222,7 @@ class CrunchyrollBaseIE(InfoExtractor):
fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
self._merge_subtitles(dash_subs, target=subtitles)
else:
- continue # XXX: Update this if/when meta mpd formats are working
+ continue # XXX: Update this if meta mpd formats work; will be tricky with token invalidation
for f in adaptive_formats:
if f.get('acodec') != 'none':
f['language'] = audio_locale
@@ -207,6 +232,15 @@ class CrunchyrollBaseIE(InfoExtractor):
for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
+ # Invalidate stream token to avoid rate-limit
+ error_msg = 'Unable to invalidate stream token; you may experience rate-limiting'
+ if stream_token := stream_response.get('token'):
+ self._request_webpage(Request(
+ f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive',
+ headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False)
+ else:
+ self.report_warning(error_msg)
+
return formats, subtitles
@@ -383,11 +417,12 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
message = f'This {object_type} is for premium members only'
- if self.is_logged_in:
- raise ExtractorError(message, expected=True)
- self.raise_login_required(message)
-
- result['formats'], result['subtitles'] = self._extract_stream(internal_id)
+ if CrunchyrollBaseIE._REFRESH_TOKEN:
+ self.raise_no_formats(message, expected=True, video_id=internal_id)
+ else:
+ self.raise_login_required(message, method='password', metadata_available=True)
+ else:
+ result['formats'], result['subtitles'] = self._extract_stream(internal_id)
result['chapters'] = self._extract_chapters(internal_id)
@@ -573,14 +608,16 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
if not response:
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
+ result = self._transform_music_response(response)
+
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
message = f'This {response.get("type") or "media"} is for premium members only'
- if self.is_logged_in:
- raise ExtractorError(message, expected=True)
- self.raise_login_required(message)
-
- result = self._transform_music_response(response)
- result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
+ if CrunchyrollBaseIE._REFRESH_TOKEN:
+ self.raise_no_formats(message, expected=True, video_id=internal_id)
+ else:
+ self.raise_login_required(message, method='password', metadata_available=True)
+ else:
+ result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
return result
diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py
index 0075680..e56584e 100644
--- a/yt_dlp/extractor/cspan.py
+++ b/yt_dlp/extractor/cspan.py
@@ -1,10 +1,12 @@
import re
from .common import InfoExtractor
+from .senategov import SenateISVPIE
+from .ustream import UstreamIE
from ..compat import compat_HTMLParseError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
extract_attributes,
find_xpath_attr,
get_element_by_attribute,
@@ -19,8 +21,6 @@ from ..utils import (
str_to_int,
unescapeHTML,
)
-from .senategov import SenateISVPIE
-from .ustream import UstreamIE
class CSpanIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py
index cec178f..1817bd2 100644
--- a/yt_dlp/extractor/ctsnews.py
+++ b/yt_dlp/extractor/ctsnews.py
@@ -1,6 +1,6 @@
from .common import InfoExtractor
-from ..utils import unified_timestamp
from .youtube import YoutubeIE
+from ..utils import unified_timestamp
class CtsNewsIE(InfoExtractor):
diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py
index 43401e1..4c25bea 100644
--- a/yt_dlp/extractor/dailymail.py
+++ b/yt_dlp/extractor/dailymail.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- int_or_none,
determine_protocol,
+ int_or_none,
try_get,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py
index 5e14d6a..2e0f6f0 100644
--- a/yt_dlp/extractor/damtomo.py
+++ b/yt_dlp/extractor/damtomo.py
@@ -1,8 +1,8 @@
import re
from .common import InfoExtractor
-from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
from ..compat import compat_str
+from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
class DamtomoBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py
new file mode 100644
index 0000000..50e4136
--- /dev/null
+++ b/yt_dlp/extractor/dangalplay.py
@@ -0,0 +1,197 @@
+import hashlib
+import json
+import re
+import time
+
+from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
+from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class DangalPlayBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'dangalplay'
+ _OTV_USER_ID = None
+ _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage'
+ _API_BASE = 'https://ottapi.dangalplay.com'
+ _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js
+ _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above
+
+ def _perform_login(self, username, password):
+ if self._OTV_USER_ID:
+ return
+ if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password):
+ raise ExtractorError(self._LOGIN_HINT, expected=True)
+ self._OTV_USER_ID = password
+
+ def _real_initialize(self):
+ if not self._OTV_USER_ID:
+ self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None)
+
+ def _extract_episode_info(self, metadata, episode_slug, series_slug):
+ return {
+ 'display_id': episode_slug,
+ 'episode_number': int_or_none(self._search_regex(
+ r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)),
+ 'season_number': int_or_none(self._search_regex(
+ r'season-(\d+)', series_slug, 'season number', default='1')),
+ 'series': series_slug,
+ **traverse_obj(metadata, {
+ 'id': ('content_id', {str}),
+ 'title': ('display_title', {str}),
+ 'episode': ('title', {str}),
+ 'series': ('show_name', {str}, {lambda x: x or None}),
+ 'series_id': ('catalog_id', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'release_timestamp': ('release_date_uts', {int_or_none}),
+ }),
+ }
+
+ def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}):
+ return self._download_json(
+ f'{self._API_BASE}/{path}', display_id, note, fatal=fatal,
+ headers={'Accept': 'application/json'}, query={
+ 'auth_token': self._AUTH_TOKEN,
+ 'region': 'IN',
+ **query,
+ })
+
+
+class DangalPlayIE(DangalPlayBaseIE):
+ IE_NAME = 'dangalplay'
+ _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<series>[^/?#]+)/(?P<id>(?!episodes)[^/?#]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01',
+ 'info_dict': {
+ 'id': '647c61dc1e7171310dcd49b4',
+ 'ext': 'mp4',
+ 'release_timestamp': 1262304000,
+ 'episode_number': 1,
+ 'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
+ 'series': 'kitani-mohabbat-hai-season-2',
+ 'season_number': 2,
+ 'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
+ 'release_date': '20100101',
+ 'duration': 2325,
+ 'season': 'Season 2',
+ 'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01',
+ 'series_id': '645c9ea41e717158ca574966',
+ },
+ }, {
+ 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01',
+ 'info_dict': {
+ 'id': '65d31d9ba73b9c3abd14a7f3',
+ 'ext': 'mp4',
+ 'episode': 'EP 1 | MILKE BHI HUM NA MILE',
+ 'release_timestamp': 1708367411,
+ 'episode_number': 1,
+ 'season': 'Season 1',
+ 'title': 'EP 1 | MILKE BHI HUM NA MILE',
+ 'duration': 156048,
+ 'release_date': '20240219',
+ 'season_number': 1,
+ 'series': 'MILKE BHI HUM NA MILE',
+ 'series_id': '645c9ea41e717158ca574966',
+ 'display_id': 'milke-bhi-hum-na-mile-ep-number-01',
+ },
+ }]
+
+ def _generate_api_data(self, data):
+ catalog_id = data['catalog_id']
+ content_id = data['content_id']
+ timestamp = str(int(time.time()))
+ unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY))
+
+ return json.dumps({
+ 'catalog_id': catalog_id,
+ 'content_id': content_id,
+ 'category': '',
+ 'region': 'IN',
+ 'auth_token': self._AUTH_TOKEN,
+ 'id': self._OTV_USER_ID,
+ 'md5': hashlib.md5(unhashed.encode()).hexdigest(),
+ 'ts': timestamp,
+ }, separators=(',', ':')).encode()
+
+ def _real_extract(self, url):
+ series_slug, episode_slug = self._match_valid_url(url).group('series', 'id')
+ metadata = self._call_api(
+ f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip',
+ episode_slug, query={'item_language': ''})['data']
+
+ try:
+ details = self._download_json(
+ f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug,
+ 'Downloading playback details JSON', headers={
+ 'Accept': 'application/json',
+ 'Content-Type': 'application/json',
+ }, data=self._generate_api_data(metadata))['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and e.cause.status == 422:
+ error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {}
+ if error_info.get('code') == '1016':
+ self.raise_login_required(
+ f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None)
+ elif msg := error_info.get('message'):
+ raise ExtractorError(msg)
+ raise
+
+ m3u8_url = traverse_obj(details, (
+ ('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any))
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4')
+
+ return {
+ 'formats': formats,
+ 'subtitles': subtitles,
+ **self._extract_episode_info(metadata, episode_slug, series_slug),
+ }
+
+
+class DangalPlaySeasonIE(DangalPlayBaseIE):
+ IE_NAME = 'dangalplay:season'
+ _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<id>[^/?#]+)(?:/(?P<sub>ep-[^/?#]+)/episodes)?/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1',
+ 'playlist_mincount': 170,
+ 'info_dict': {
+ 'id': 'kitani-mohabbat-hai-season-1',
+ },
+ }, {
+ 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes',
+ 'playlist_count': 30,
+ 'info_dict': {
+ 'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1',
+ },
+ }, {
+ # 1 season only, series page is season page
+ 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile',
+ 'playlist_mincount': 15,
+ 'info_dict': {
+ 'id': 'milke-bhi-hum-na-mile',
+ },
+ }]
+
+ def _entries(self, subcategories, series_slug):
+ for subcategory in subcategories:
+ data = self._call_api(
+ f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip',
+ series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={
+ 'order_by': 'asc',
+ 'status': 'published',
+ })
+ for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])):
+ episode_slug = ep['friendly_id']
+ yield self.url_result(
+ f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}',
+ DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug))
+
+ def _real_extract(self, url):
+ series_slug, subcategory = self._match_valid_url(url).group('id', 'sub')
+ subcategories = [subcategory] if subcategory else traverse_obj(
+ self._call_api(
+ f'catalogs/shows/items/{series_slug}.gzip', series_slug,
+ 'Downloading season info JSON', query={'item_language': ''}),
+ ('data', 'subcategories', ..., 'friendly_id', {str}))
+
+ return self.playlist_result(
+ self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory))
diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py
index 1624d08..1774249 100644
--- a/yt_dlp/extractor/democracynow.py
+++ b/yt_dlp/extractor/democracynow.py
@@ -1,11 +1,11 @@
-import re
import os.path
+import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
- url_basename,
remove_start,
+ url_basename,
)
diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py
index c11cd79..4380c41 100644
--- a/yt_dlp/extractor/digitalconcerthall.py
+++ b/yt_dlp/extractor/digitalconcerthall.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
ExtractorError,
parse_resolution,
diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py
index 1f3d8e3..b2663a6 100644
--- a/yt_dlp/extractor/discoverygo.py
+++ b/yt_dlp/extractor/discoverygo.py
@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
determine_ext,
extract_attributes,
- ExtractorError,
int_or_none,
parse_age_limit,
remove_end,
diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py
index 430de32..d8dde0c 100644
--- a/yt_dlp/extractor/disney.py
+++ b/yt_dlp/extractor/disney.py
@@ -2,10 +2,10 @@ import re
from .common import InfoExtractor
from ..utils import (
- int_or_none,
- unified_strdate,
determine_ext,
+ int_or_none,
join_nonempty,
+ unified_strdate,
update_url_query,
)
diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py
index ee8893d..244ffdf 100644
--- a/yt_dlp/extractor/douyutv.py
+++ b/yt_dlp/extractor/douyutv.py
@@ -1,5 +1,5 @@
-import time
import hashlib
+import time
import urllib
import uuid
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index 363b4be..ddf2128 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -4,8 +4,8 @@ import uuid
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
float_or_none,
int_or_none,
remove_start,
@@ -355,12 +355,10 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
video_id, headers=headers, data=json.dumps({
'deviceInfo': {
'adBlocker': False,
+ 'drmSupported': False,
},
'videoId': video_id,
- 'wisteriaProperties': {
- 'platform': 'desktop',
- 'product': self._PRODUCT,
- },
+ 'wisteriaProperties': {},
}).encode('utf-8'))['data']['attributes']['streaming']
def _real_extract(self, url):
@@ -878,10 +876,31 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
})
-class DiscoveryNetworksDeIE(DPlayBaseIE):
+class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
_TESTS = [{
+ 'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold',
+ 'info_dict': {
+ 'id': '4756322',
+ 'ext': 'mp4',
+ 'title': 'German Gold',
+ 'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6',
+ 'display_id': 'goldrausch-in-australien/german-gold',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'season': 'Season 5',
+ 'season_number': 5,
+ 'series': 'Goldrausch in Australien',
+ 'duration': 2648.0,
+ 'upload_date': '20230517',
+ 'timestamp': 1684357500,
+ 'creators': ['DMAX'],
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg',
+ 'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
'info_dict': {
'id': '78867',
@@ -901,9 +920,7 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
'season_number': 1,
'thumbnail': r're:https://.+\.jpg',
},
- 'params': {
- 'skip_download': True,
- },
+ 'skip': '404 Not Found',
}, {
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
'only_matching': True,
@@ -920,8 +937,14 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
return self._get_disco_api_info(
- url, '%s/%s' % (programme, alternate_id),
- 'sonic-eu1-prod.disco-api.com', realm, country)
+ url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country)
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': f'realm={realm}',
+ 'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
class DiscoveryPlusShowBaseIE(DPlayBaseIE):
diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py
index e5dab6a..a9247ed 100644
--- a/yt_dlp/extractor/drtuber.py
+++ b/yt_dlp/extractor/drtuber.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- int_or_none,
NO_DEFAULT,
+ int_or_none,
parse_duration,
str_to_int,
)
diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py
index 626e577..adc7705 100644
--- a/yt_dlp/extractor/duboku.py
+++ b/yt_dlp/extractor/duboku.py
@@ -5,9 +5,9 @@ import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
+ ExtractorError,
clean_html,
extract_attributes,
- ExtractorError,
get_elements_by_class,
int_or_none,
js_to_json,
diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py
index e671433..e6660dc 100644
--- a/yt_dlp/extractor/dvtv.py
+++ b/yt_dlp/extractor/dvtv.py
@@ -2,15 +2,15 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
join_nonempty,
js_to_json,
mimetype2ext,
+ parse_iso8601,
try_get,
unescapeHTML,
- parse_iso8601,
)
diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py
index f7b8520..feab804 100644
--- a/yt_dlp/extractor/dw.py
+++ b/yt_dlp/extractor/dw.py
@@ -1,10 +1,10 @@
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
int_or_none,
unified_strdate,
url_or_none,
)
-from ..compat import compat_urlparse
class DWIE(InfoExtractor):
diff --git a/yt_dlp/extractor/einthusan.py b/yt_dlp/extractor/einthusan.py
deleted file mode 100644
index 53bc253..0000000
--- a/yt_dlp/extractor/einthusan.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import json
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- extract_attributes,
- ExtractorError,
- get_elements_by_class,
- urlencode_postdata,
-)
-
-
-class EinthusanIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://einthusan.tv/movie/watch/9097/',
- 'md5': 'ff0f7f2065031b8a2cf13a933731c035',
- 'info_dict': {
- 'id': '9097',
- 'ext': 'mp4',
- 'title': 'Ae Dil Hai Mushkil',
- 'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }, {
- 'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
- 'only_matching': True,
- }, {
- 'url': 'https://einthusan.com/movie/watch/9097/',
- 'only_matching': True,
- }, {
- 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
- 'only_matching': True,
- }]
-
- # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
- def _decrypt(self, encrypted_data, video_id):
- return self._parse_json(compat_b64decode((
- encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
- )).decode('utf-8'), video_id)
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- host = mobj.group('host')
- video_id = mobj.group('id')
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
-
- player_params = extract_attributes(self._search_regex(
- r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
-
- page_id = self._html_search_regex(
- '<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
- video_data = self._download_json(
- 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
- data=urlencode_postdata({
- 'xEvent': 'UIVideoPlayer.PingOutcome',
- 'xJson': json.dumps({
- 'EJOutcomes': player_params['data-ejpingables'],
- 'NativeHLS': False
- }),
- 'arcVersion': 3,
- 'appVersion': 59,
- 'gorilla.csrf.Token': page_id,
- }))['Data']
-
- if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
- raise ExtractorError(
- 'Download rate reached. Please try again later.', expected=True)
-
- ej_links = self._decrypt(video_data['EJLinks'], video_id)
-
- formats = []
-
- m3u8_url = ej_links.get('HLSLink')
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
-
- mp4_url = ej_links.get('MP4Link')
- if mp4_url:
- formats.append({
- 'url': mp4_url,
- })
-
- description = get_elements_by_class('synopsis', webpage)[0]
- thumbnail = self._html_search_regex(
- r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
- webpage, 'thumbnail url', fatal=False, group='url')
- if thumbnail is not None:
- thumbnail = compat_urlparse.urljoin(url, thumbnail)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnail': thumbnail,
- 'description': description,
- }
diff --git a/yt_dlp/extractor/eplus.py b/yt_dlp/extractor/eplus.py
index 88a8d5a..d2ad5b4 100644
--- a/yt_dlp/extractor/eplus.py
+++ b/yt_dlp/extractor/eplus.py
@@ -16,13 +16,31 @@ class EplusIbIE(InfoExtractor):
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
_TESTS = [{
- 'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
+ 'url': 'https://live.eplus.jp/ex/player?ib=41K6Wzbr3PlcMD%2FOKHFlC%2FcZCe2Eaw7FK%2BpJS1ooUHki8d0vGSy2mYqxillQBe1dSnOxU%2B8%2FzXKls4XPBSb3vw%3D%3D',
'info_dict': {
- 'id': '354502-0001-002',
- 'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
+ 'id': '335699-0001-006',
+ 'title': '少女☆歌劇 レヴュースタァライト -The LIVE 青嵐- BLUE GLITTER <定点映像配信>【Streaming+(配信)】',
'live_status': 'was_live',
- 'release_date': '20211231',
- 'release_timestamp': 1640952000,
+ 'release_date': '20201221',
+ 'release_timestamp': 1608544800,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': [
+ 'This event may not be accessible',
+ 'No video formats found',
+ 'Requested format is not available',
+ ],
+ }, {
+ 'url': 'https://live.eplus.jp/ex/player?ib=6QSsQdyRAwOFZrEHWlhRm7vocgV%2FO0YzBZ%2BaBEBg1XR%2FmbLn0R%2F048dUoAY038%2F%2F92MJ73BsoAtvUpbV6RLtDQ%3D%3D&show_id=2371511',
+ 'info_dict': {
+ 'id': '348021-0054-001',
+ 'title': 'ラブライブ!スーパースター!! Liella! First LoveLive! Tour ~Starlines~【東京/DAY.1】',
+ 'live_status': 'was_live',
+ 'release_date': '20220115',
+ 'release_timestamp': 1642233600,
'description': str,
},
'params': {
@@ -124,6 +142,10 @@ class EplusIbIE(InfoExtractor):
if data_json.get('drm_mode') == 'ON':
self.report_drm(video_id)
+ if data_json.get('is_pass_ticket') == 'YES':
+ raise ExtractorError(
+ 'This URL is for a pass ticket instead of a player page', expected=True)
+
delivery_status = data_json.get('delivery_status')
archive_mode = data_json.get('archive_mode')
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py
index 9ecdf5d..19c6933 100644
--- a/yt_dlp/extractor/ertgr.py
+++ b/yt_dlp/extractor/ertgr.py
@@ -4,15 +4,15 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
dict_get,
int_or_none,
merge_dicts,
- parse_qs,
parse_age_limit,
parse_iso8601,
+ parse_qs,
str_or_none,
try_get,
url_or_none,
diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py
index 191a436..0cf889a 100644
--- a/yt_dlp/extractor/europa.py
+++ b/yt_dlp/extractor/europa.py
@@ -8,7 +8,7 @@ from ..utils import (
qualities,
traverse_obj,
unified_strdate,
- xpath_text
+ xpath_text,
)
@@ -94,13 +94,14 @@ class EuropaIE(InfoExtractor):
class EuroParlWebstreamIE(InfoExtractor):
_VALID_URL = r'''(?x)
- https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
- (?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
+ https?://multimedia\.europarl\.europa\.eu/
+ (?:\w+/)?webstreaming/(?:[\w-]+_)?(?P<id>[\w-]+)
'''
_TESTS = [{
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
'info_dict': {
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
+ 'display_id': '20220914-0900-PLENARY',
'ext': 'mp4',
'title': 'Plenary session',
'release_timestamp': 1663139069,
@@ -125,6 +126,7 @@ class EuroParlWebstreamIE(InfoExtractor):
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
'info_dict': {
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
+ 'display_id': '20230301-1130-COMMITTEE-CULT',
'ext': 'mp4',
'release_date': '20230301',
'title': 'Committee on Culture and Education',
@@ -142,6 +144,19 @@ class EuroParlWebstreamIE(InfoExtractor):
'live_status': 'is_live',
},
'skip': 'Not live anymore'
+ }, {
+ 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER',
+ 'info_dict': {
+ 'id': 'c1f11567-5b52-470a-f3e1-08dc3c216ace',
+ 'display_id': '20240320-1345-SPECIAL-PRESSER',
+ 'ext': 'mp4',
+ 'release_date': '20240320',
+ 'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234',
+ 'release_timestamp': 1710939767,
+ }
+ }, {
+ 'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -166,6 +181,7 @@ class EuroParlWebstreamIE(InfoExtractor):
return {
'id': json_info['id'],
+ 'display_id': display_id,
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
'formats': formats,
'subtitles': subtitles,
diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py
index 65a1dc7..66fa42f 100644
--- a/yt_dlp/extractor/euscreen.py
+++ b/yt_dlp/extractor/euscreen.py
@@ -1,8 +1,7 @@
from .common import InfoExtractor
-
from ..utils import (
- parse_duration,
js_to_json,
+ parse_duration,
)
diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py
index d8b068e..4a13ab0 100644
--- a/yt_dlp/extractor/eyedotv.py
+++ b/yt_dlp/extractor/eyedotv.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..utils import (
- xpath_text,
- parse_duration,
ExtractorError,
+ parse_duration,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index 834b1df..b76407a 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -560,7 +560,7 @@ class FacebookIE(InfoExtractor):
js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats):
- dash_manifest = video.get('dash_manifest')
+ dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
if dash_manifest:
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py
index cddf254..1e80f9a 100644
--- a/yt_dlp/extractor/fancode.py
+++ b/yt_dlp/extractor/fancode.py
@@ -1,12 +1,6 @@
from .common import InfoExtractor
-
from ..compat import compat_str
-from ..utils import (
- parse_iso8601,
- ExtractorError,
- try_get,
- mimetype2ext
-)
+from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get
class FancodeVodIE(InfoExtractor):
diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py
index bca62ad..796bac3 100644
--- a/yt_dlp/extractor/faz.py
+++ b/yt_dlp/extractor/faz.py
@@ -3,9 +3,9 @@ import re
from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..utils import (
+ int_or_none,
xpath_element,
xpath_text,
- int_or_none,
)
diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py
index 8175b6b..b2dbb92 100644
--- a/yt_dlp/extractor/fczenit.py
+++ b/yt_dlp/extractor/fczenit.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
float_or_none,
+ int_or_none,
)
diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py
index f604cbd..ae837f6 100644
--- a/yt_dlp/extractor/fifa.py
+++ b/yt_dlp/extractor/fifa.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
traverse_obj,
diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py
index 0cd18f4..69ca87c 100644
--- a/yt_dlp/extractor/filmon.py
+++ b/yt_dlp/extractor/filmon.py
@@ -2,10 +2,10 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
+ int_or_none,
qualities,
strip_or_none,
- int_or_none,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py
index f9d22fd..c10d290 100644
--- a/yt_dlp/extractor/gab.py
+++ b/yt_dlp/extractor/gab.py
@@ -7,7 +7,7 @@ from ..utils import (
parse_codecs,
parse_duration,
str_to_int,
- unified_timestamp
+ unified_timestamp,
)
diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py
index 1d3c0b1..b284e1e 100644
--- a/yt_dlp/extractor/gamejolt.py
+++ b/yt_dlp/extractor/gamejolt.py
@@ -10,7 +10,7 @@ from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
- try_get
+ try_get,
)
diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py
index bc56b03..6403be8 100644
--- a/yt_dlp/extractor/gaskrank.py
+++ b/yt_dlp/extractor/gaskrank.py
@@ -1,4 +1,5 @@
import re
+
from .common import InfoExtractor
from ..utils import (
float_or_none,
diff --git a/yt_dlp/extractor/gbnews.py b/yt_dlp/extractor/gbnews.py
new file mode 100644
index 0000000..bb1554e
--- /dev/null
+++ b/yt_dlp/extractor/gbnews.py
@@ -0,0 +1,107 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ get_elements_html_by_class,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class GBNewsIE(InfoExtractor):
+ IE_DESC = 'GB News clips, features and live streams'
+ _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
+
+ _PLATFORM = 'safari'
+ _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
+ _TESTS = [{
+ 'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
+ 'info_dict': {
+ 'id': '52264136',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
+ 'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
+ 'description': 'The post was criticised by former employers of the broadcaster',
+ 'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
+ },
+ }, {
+ 'url': 'https://www.gbnews.com/royal/prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
+ 'info_dict': {
+ 'id': '52328390',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
+ 'display_id': 'prince-harry-in-love-with-kate-meghan-markle-jealous-royal',
+ 'description': 'Ingrid Seward has published 17 books documenting the highs and lows of the Royal Family',
+ 'title': 'Royal author claims Prince Harry was \'in love\' with Kate - Meghan was \'jealous\'',
+ }
+ }, {
+ 'url': 'https://www.gbnews.uk/watchlive',
+ 'info_dict': {
+ 'id': '1069',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)',
+ 'display_id': 'watchlive',
+ 'live_status': 'is_live',
+ 'title': r're:^GB News Live',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ @functools.lru_cache
+ def _get_ss_endpoint(self, data_id, data_env):
+ if not data_id:
+ data_id = 'GB003'
+ if not data_env:
+ data_env = 'production'
+
+ json_data = self._download_json(
+ self._SSMP_URL, None, 'Downloading Simplestream JSON metadata', query={
+ 'id': data_id,
+ 'env': data_env,
+ })
+ meta_url = traverse_obj(json_data, ('response', 'api_hostname', {url_or_none}))
+ if not meta_url:
+ raise ExtractorError('No API host found')
+
+ return meta_url
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url).rpartition('/')[2]
+ webpage = self._download_webpage(url, display_id)
+
+ video_data = None
+ elements = get_elements_html_by_class('simplestream', webpage)
+ for html_tag in elements:
+ attributes = extract_attributes(html_tag)
+ if 'sidebar' not in (attributes.get('class') or ''):
+ video_data = attributes
+ if not video_data:
+ raise ExtractorError('Could not find video element', expected=True)
+
+ endpoint_url = self._get_ss_endpoint(video_data.get('data-id'), video_data.get('data-env'))
+
+ uvid = video_data['data-uvid']
+ video_type = video_data.get('data-type')
+ if not video_type or video_type == 'vod':
+ video_type = 'show'
+ stream_data = self._download_json(
+ f'{endpoint_url}/api/{video_type}/stream/{uvid}',
+ uvid, 'Downloading stream JSON', query={
+ 'key': video_data.get('data-key'),
+ 'platform': self._PLATFORM,
+ })
+ if traverse_obj(stream_data, 'drm'):
+ self.report_drm(uvid)
+
+ return {
+ 'id': uvid,
+ 'display_id': display_id,
+ 'title': self._og_search_title(webpage, default=None),
+ 'description': self._og_search_description(webpage, default=None),
+ 'formats': self._extract_m3u8_formats(traverse_obj(stream_data, (
+ 'response', 'stream', {url_or_none})), uvid, 'mp4'),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'is_live': video_type == 'live',
+ }
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 2cfed0f..2818c71 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -4,7 +4,7 @@ import types
import urllib.parse
import xml.etree.ElementTree
-from .common import InfoExtractor # isort: split
+from .common import InfoExtractor
from .commonprotocols import RtmpIE
from .youtube import YoutubeIE
from ..compat import compat_etree_fromstring
diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py
index 7795dc5..b9dc7c6 100644
--- a/yt_dlp/extractor/gettr.py
+++ b/yt_dlp/extractor/gettr.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- bool_or_none,
ExtractorError,
+ bool_or_none,
dict_get,
float_or_none,
int_or_none,
diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py
index c5bc86b..7baf8de 100644
--- a/yt_dlp/extractor/gigya.py
+++ b/yt_dlp/extractor/gigya.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
ExtractorError,
urlencode_postdata,
diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py
index 22aac0d..515f3c5 100644
--- a/yt_dlp/extractor/glomex.py
+++ b/yt_dlp/extractor/glomex.py
@@ -3,9 +3,9 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
determine_ext,
extract_attributes,
- ExtractorError,
int_or_none,
parse_qs,
smuggle_url,
diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py
index b075a02..fba98d7 100644
--- a/yt_dlp/extractor/go.py
+++ b/yt_dlp/extractor/go.py
@@ -3,16 +3,16 @@ import re
from .adobepass import AdobePassIE
from ..compat import compat_str
from ..utils import (
- int_or_none,
+ ExtractorError,
determine_ext,
+ int_or_none,
parse_age_limit,
- remove_start,
remove_end,
+ remove_start,
+ traverse_obj,
try_get,
- urlencode_postdata,
- ExtractorError,
unified_timestamp,
- traverse_obj,
+ urlencode_postdata,
)
diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py
new file mode 100644
index 0000000..276a6c7
--- /dev/null
+++ b/yt_dlp/extractor/godresource.py
@@ -0,0 +1,79 @@
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ str_or_none,
+ unified_timestamp,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class GodResourceIE(InfoExtractor):
+ _VALID_URL = r'https?://new\.godresource\.com/video/(?P<id>\w+)'
+ _TESTS = [{
+ # hls stream
+ 'url': 'https://new.godresource.com/video/A01mTKjyf6w',
+ 'info_dict': {
+ 'id': 'A01mTKjyf6w',
+ 'ext': 'mp4',
+ 'view_count': int,
+ 'timestamp': 1710978666,
+ 'channel_id': '5',
+ 'thumbnail': 'https://cdn-02.godresource.com/e42968ac-9e8b-4231-ab86-f4f9d775841f/thumbnail.jpg',
+ 'channel': 'Stedfast Baptist Church',
+ 'upload_date': '20240320',
+ 'title': 'GodResource video #A01mTKjyf6w',
+ }
+ }, {
+ # mp4 link
+ 'url': 'https://new.godresource.com/video/01DXmBbQv_X',
+ 'md5': '0e8f72aa89a106b9d5c011ba6f8717b7',
+ 'info_dict': {
+ 'id': '01DXmBbQv_X',
+ 'ext': 'mp4',
+ 'channel_id': '12',
+ 'view_count': int,
+ 'timestamp': 1687996800,
+ 'thumbnail': 'https://cdn-02.godresource.com/sodomitedeception/thumbnail.jpg',
+ 'channel': 'Documentaries',
+ 'title': 'The Sodomite Deception',
+ 'upload_date': '20230629',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ api_data = self._download_json(
+ f'https://api.godresource.com/api/Streams/{display_id}', display_id)
+
+ video_url = api_data['streamUrl']
+ is_live = api_data.get('isLive') or False
+ if (ext := determine_ext(video_url)) == 'm3u8':
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ video_url, display_id, live=is_live)
+ elif ext == 'mp4':
+ formats, subtitles = [{
+ 'url': video_url,
+ 'ext': ext
+ }], {}
+ else:
+ raise ExtractorError(f'Unexpected video format {ext}')
+
+ return {
+ 'id': display_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': '',
+ 'is_live': is_live,
+ **traverse_obj(api_data, {
+ 'title': ('title', {str}),
+ 'thumbnail': ('thumbnail', {url_or_none}),
+ 'view_count': ('views', {int}),
+ 'channel': ('channelName', {str}),
+ 'channel_id': ('channelId', {str_or_none}),
+ 'timestamp': ('streamDateCreated', {unified_timestamp}),
+ 'modified_timestamp': ('streamDataModified', {unified_timestamp})
+ })
+ }
diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py
index c6eca0c..fac0884 100644
--- a/yt_dlp/extractor/gofile.py
+++ b/yt_dlp/extractor/gofile.py
@@ -1,10 +1,7 @@
import hashlib
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- try_get
-)
+from ..utils import ExtractorError, try_get
class GofileIE(InfoExtractor):
diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
index 06658dd..c19192c 100644
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@@ -1,9 +1,11 @@
import re
from .common import InfoExtractor
+from .youtube import YoutubeIE
from ..compat import compat_parse_qs
from ..utils import (
ExtractorError,
+ bug_reports_message,
determine_ext,
extract_attributes,
get_element_by_class,
@@ -39,6 +41,17 @@ class GoogleDriveIE(InfoExtractor):
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
}
}, {
+ # has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
+ 'url': 'https://drive.google.com/uc?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+ 'md5': '322db8d63dd19788c04050a4bba67073',
+ 'info_dict': {
+ 'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+ 'ext': 'mp3',
+ 'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
+ 'duration': 184,
+ 'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
+ },
+ }, {
# video can't be watched anonymously due to view count limit reached,
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
@@ -58,22 +71,8 @@ class GoogleDriveIE(InfoExtractor):
'only_matching': True,
}]
_FORMATS_EXT = {
- '5': 'flv',
- '6': 'flv',
- '13': '3gp',
- '17': '3gp',
- '18': 'mp4',
- '22': 'mp4',
- '34': 'flv',
- '35': 'flv',
- '36': '3gp',
- '37': 'mp4',
- '38': 'mp4',
- '43': 'webm',
- '44': 'webm',
- '45': 'webm',
- '46': 'webm',
- '59': 'mp4',
+ **{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
+ '50': 'm4a',
}
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
_CAPTIONS_ENTRY_TAG = {
@@ -194,10 +193,13 @@ class GoogleDriveIE(InfoExtractor):
if len(fmt_stream_split) < 2:
continue
format_id, format_url = fmt_stream_split[:2]
+ ext = self._FORMATS_EXT.get(format_id)
+ if not ext:
+ self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
f = {
'url': lowercase_escape(format_url),
'format_id': format_id,
- 'ext': self._FORMATS_EXT[format_id],
+ 'ext': ext,
}
resolution = resolutions.get(format_id)
if resolution:
diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py
index 112293b..9c1a6cb 100644
--- a/yt_dlp/extractor/gotostage.py
+++ b/yt_dlp/extractor/gotostage.py
@@ -1,11 +1,8 @@
+import json
+
from .common import InfoExtractor
from ..compat import compat_str
-from ..utils import (
- try_get,
- url_or_none
-)
-
-import json
+from ..utils import try_get, url_or_none
class GoToStageIE(InfoExtractor):
diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py
index 530bdb7..2551cff 100644
--- a/yt_dlp/extractor/hbo.py
+++ b/yt_dlp/extractor/hbo.py
@@ -2,11 +2,11 @@ import re
from .common import InfoExtractor
from ..utils import (
- xpath_text,
- xpath_element,
int_or_none,
parse_duration,
urljoin,
+ xpath_element,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py
index d1a400d..eb0a779 100644
--- a/yt_dlp/extractor/hearthisat.py
+++ b/yt_dlp/extractor/hearthisat.py
@@ -1,19 +1,20 @@
from .common import InfoExtractor
from ..utils import (
- determine_ext,
KNOWN_EXTENSIONS,
+ determine_ext,
str_to_int,
)
class HearThisAtIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
+ _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/?#]+)/(?P<title>[\w.-]+)'
_PLAYLIST_URL = 'https://hearthis.at/playlist.php'
_TESTS = [{
'url': 'https://hearthis.at/moofi/dr-kreep',
'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
'info_dict': {
'id': '150939',
+ 'display_id': 'moofi - dr-kreep',
'ext': 'wav',
'title': 'Moofi - Dr. Kreep',
'thumbnail': r're:^https?://.*\.jpg$',
@@ -21,15 +22,16 @@ class HearThisAtIE(InfoExtractor):
'description': 'md5:1adb0667b01499f9d27e97ddfd53852a',
'upload_date': '20150118',
'view_count': int,
- 'duration': 71,
- 'genre': 'Experimental',
- }
+ 'duration': 70,
+ 'genres': ['Experimental'],
+ },
}, {
# 'download' link redirects to the original webpage
'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
'md5': '5980ceb7c461605d30f1f039df160c6e',
'info_dict': {
'id': '811296',
+ 'display_id': 'twitchsf - dj-jim-hopkins-totally-bitchin-80s-dance-mix',
'ext': 'mp3',
'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',
'description': 'md5:ef26815ca8f483272a87b137ff175be2',
@@ -38,7 +40,39 @@ class HearThisAtIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'view_count': int,
'duration': 4360,
- 'genre': 'Dance',
+ 'genres': ['Dance'],
+ },
+ }, {
+ 'url': 'https://hearthis.at/tindalos/0001-tindalos-gnrique/eQd/',
+ 'md5': 'cd08e51911f147f6da2d9678905b0bd9',
+ 'info_dict': {
+ 'id': '2685222',
+ 'ext': 'mp3',
+ 'duration': 86,
+ 'view_count': int,
+ 'timestamp': 1545471670,
+ 'display_id': 'tindalos - 0001-tindalos-gnrique',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'genres': ['Other'],
+ 'title': 'Tindalos - Tindalos - générique n°1',
+ 'description': '',
+ 'upload_date': '20181222',
+ },
+ }, {
+ 'url': 'https://hearthis.at/sithi2/biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011/',
+ 'md5': 'b45ac60f0c8111eef6ddc10ec232e312',
+ 'info_dict': {
+ 'id': '7145959',
+ 'ext': 'mp3',
+ 'description': 'md5:d7ae36a453d78903f6b7ed6eb2fce1f2',
+ 'duration': 8986,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'md5:62669ce5b1b67f45c6f846033f37d3b9',
+ 'timestamp': 1588699409,
+ 'display_id': 'sithi2 - biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011',
+ 'view_count': int,
+ 'upload_date': '20200505',
+ 'genres': ['Other'],
},
}]
diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py
index e026996..099c2a1 100644
--- a/yt_dlp/extractor/hketv.py
+++ b/yt_dlp/extractor/hketv.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
merge_dicts,
parse_count,
diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py
index 57b76e4..41d50d0 100644
--- a/yt_dlp/extractor/hrti.py
+++ b/yt_dlp/extractor/hrti.py
@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
parse_age_limit,
try_get,
diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py
index c4965f9..5379b54 100644
--- a/yt_dlp/extractor/huya.py
+++ b/yt_dlp/extractor/huya.py
@@ -2,8 +2,8 @@ import hashlib
import random
import re
-from ..compat import compat_urlparse, compat_b64decode
-
+from .common import InfoExtractor
+from ..compat import compat_b64decode, compat_urlparse
from ..utils import (
ExtractorError,
int_or_none,
@@ -13,8 +13,6 @@ from ..utils import (
update_url_query,
)
-from .common import InfoExtractor
-
class HuyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
diff --git a/yt_dlp/extractor/hytale.py b/yt_dlp/extractor/hytale.py
index 0f4dcc3..e8cd21a 100644
--- a/yt_dlp/extractor/hytale.py
+++ b/yt_dlp/extractor/hytale.py
@@ -1,7 +1,8 @@
import re
+from .cloudflarestream import CloudflareStreamIE
from .common import InfoExtractor
-from ..utils import traverse_obj
+from ..utils.traversal import traverse_obj
class HytaleIE(InfoExtractor):
@@ -49,7 +50,7 @@ class HytaleIE(InfoExtractor):
entries = [
self.url_result(
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
- title=self._titles.get(video_hash), url_transparent=True)
+ CloudflareStreamIE, title=self._titles.get(video_hash), url_transparent=True)
for video_hash in re.findall(
r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
webpage)
diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py
index 9d55ddc..c28d09f 100644
--- a/yt_dlp/extractor/ichinanalive.py
+++ b/yt_dlp/extractor/ichinanalive.py
@@ -1,6 +1,6 @@
from .common import InfoExtractor
-from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
from ..compat import compat_str
+from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate
class IchinanaLiveIE(InfoExtractor):
diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py
index 192bcfe..2bb4850 100644
--- a/yt_dlp/extractor/infoq.py
+++ b/yt_dlp/extractor/infoq.py
@@ -1,3 +1,4 @@
+from .bokecc import BokeCCBaseIE
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
@@ -6,10 +7,9 @@ from ..compat import (
from ..utils import (
ExtractorError,
determine_ext,
- update_url_query,
traverse_obj,
+ update_url_query,
)
-from .bokecc import BokeCCBaseIE
class InfoQIE(BokeCCBaseIE):
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index f7f2150..46f9cd6 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -255,7 +255,7 @@ class InstagramIOSIE(InfoExtractor):
class InstagramIE(InstagramBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
@@ -379,6 +379,9 @@ class InstagramIE(InstagramBaseIE):
}, {
'url': 'https://www.instagram.com/marvelskies.fc/reel/CWqAgUZgCku/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.instagram.com/reels/Cop84x6u7CP/',
+ 'only_matching': True,
}]
@classmethod
diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py
index f7aa579..d5a3d80 100644
--- a/yt_dlp/extractor/iprima.py
+++ b/yt_dlp/extractor/iprima.py
@@ -3,12 +3,12 @@ import time
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
determine_ext,
js_to_json,
- urlencode_postdata,
- ExtractorError,
parse_qs,
- traverse_obj
+ traverse_obj,
+ urlencode_postdata,
)
diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py
index 3368ab1..85ed549 100644
--- a/yt_dlp/extractor/iqiyi.py
+++ b/yt_dlp/extractor/iqiyi.py
@@ -4,20 +4,16 @@ import re
import time
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_unquote
-)
from .openload import PhantomJSwrapper
+from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_urlencode
from ..utils import (
+ ExtractorError,
clean_html,
decode_packed_codes,
- ExtractorError,
float_or_none,
format_field,
- get_element_by_id,
get_element_by_attribute,
+ get_element_by_id,
int_or_none,
js_to_json,
ohdave_rsa_encrypt,
diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py
index 713fd4e..5d6fbaa 100644
--- a/yt_dlp/extractor/itprotv.py
+++ b/yt_dlp/extractor/itprotv.py
@@ -1,12 +1,11 @@
import re
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py
index 9ac7be3..55c4165 100644
--- a/yt_dlp/extractor/itv.py
+++ b/yt_dlp/extractor/itv.py
@@ -1,23 +1,22 @@
import json
-from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
-
+from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ JSON_LD_RE,
+ ExtractorError,
base_url,
clean_html,
determine_ext,
extract_attributes,
- ExtractorError,
get_element_by_class,
- JSON_LD_RE,
merge_dicts,
parse_duration,
smuggle_url,
try_get,
- url_or_none,
url_basename,
+ url_or_none,
urljoin,
)
diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index e23fdfd..a11f3f1 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -1,9 +1,9 @@
import functools
-import urllib.parse
-import urllib.error
import hashlib
import json
import time
+import urllib.error
+import urllib.parse
from .common import InfoExtractor
from ..utils import (
diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py
deleted file mode 100644
index 71fed49..0000000
--- a/yt_dlp/extractor/jable.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- InAdvancePagedList,
- int_or_none,
- orderedSet,
- unified_strdate,
-)
-
-
-class JableIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P<id>[\w-]+)'
- _TESTS = [{
- 'url': 'https://jable.tv/videos/pppd-812/',
- 'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
- 'info_dict': {
- 'id': 'pppd-812',
- 'ext': 'mp4',
- 'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
- 'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- 'like_count': int,
- 'view_count': int,
- },
- }, {
- 'url': 'https://jable.tv/videos/apak-220/',
- 'md5': '71f9239d69ced58ab74a816908847cc1',
- 'info_dict': {
- 'id': 'apak-220',
- 'ext': 'mp4',
- 'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
- 'description': '',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- 'like_count': int,
- 'view_count': int,
- 'upload_date': '20220319',
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- formats = self._extract_m3u8_formats(
- self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage, default=''),
- 'thumbnail': self._og_search_thumbnail(webpage, default=None),
- 'formats': formats,
- 'age_limit': 18,
- 'upload_date': unified_strdate(self._search_regex(
- r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
- 'view_count': int_or_none(self._search_regex(
- r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)',
- webpage, 'view_count', default='').replace(' ', '')),
- 'like_count': int_or_none(self._search_regex(
- r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)),
- }
-
-
-class JablePlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
- _TESTS = [{
- 'url': 'https://jable.tv/models/kaede-karen/',
- 'info_dict': {
- 'id': 'kaede-karen',
- 'title': '楓カレン',
- },
- 'playlist_count': 34,
- }, {
- 'url': 'https://jable.tv/categories/roleplay/',
- 'only_matching': True,
- }, {
- 'url': 'https://jable.tv/tags/girl/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
-
- def page_func(page_num):
- return [
- self.url_result(player_url, JableIE)
- for player_url in orderedSet(re.findall(
- r'href="(https://jable.tv/videos/[\w-]+/?)"',
- self._download_webpage(url, playlist_id, query={
- 'mode': 'async',
- 'from': page_num + 1,
- 'function': 'get_block',
- 'block_id': 'list_videos_common_videos_list',
- }, note=f'Downloading page {page_num + 1}')))]
-
- return self.playlist_result(
- InAdvancePagedList(page_func, int_or_none(self._search_regex(
- r'from:(\d+)">[^<]+\s*&raquo;', webpage, 'last page number', default=1)), 24),
- playlist_id, self._search_regex(
- r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None))
diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py
index a2bbba3..8557a81 100644
--- a/yt_dlp/extractor/jamendo.py
+++ b/yt_dlp/extractor/jamendo.py
@@ -1,8 +1,8 @@
import hashlib
import random
-from ..compat import compat_str
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
int_or_none,
diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py
index 6c65056..19d2b92 100644
--- a/yt_dlp/extractor/japandiet.py
+++ b/yt_dlp/extractor/japandiet.py
@@ -1,5 +1,6 @@
import re
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
@@ -9,9 +10,8 @@ from ..utils import (
smuggle_url,
traverse_obj,
try_call,
- unsmuggle_url
+ unsmuggle_url,
)
-from .common import InfoExtractor
def _parse_japanese_date(text):
diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py
new file mode 100644
index 0000000..e7186d7
--- /dev/null
+++ b/yt_dlp/extractor/jiocinema.py
@@ -0,0 +1,403 @@
+import base64
+import itertools
+import json
+import random
+import re
+import string
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ jwt_decode_hs256,
+ parse_age_limit,
+ try_call,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class JioCinemaBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'jiocinema'
+ _GEO_BYPASS = False
+ _ACCESS_TOKEN = None
+ _REFRESH_TOKEN = None
+ _GUEST_TOKEN = None
+ _USER_ID = None
+ _DEVICE_ID = None
+ _API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
+ _APP_NAME = {'appName': 'RJIL_JioCinema'}
+ _APP_VERSION = {'appVersion': '5.0.0'}
+ _API_SIGNATURES = 'o668nxgzwff'
+ _METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi'
+ _ACCESS_HINT = 'the `accessToken` from your browser local storage'
+ _LOGIN_HINT = (
+ 'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, '
+ f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. '
+ 'If you have previously logged in with yt-dlp and your session '
+ 'has been cached, you can use "-u device -p <DEVICE_ID>"')
+
+ def _cache_token(self, token_type):
+ assert token_type in ('access', 'refresh', 'all')
+ if token_type in ('access', 'all'):
+ self.cache.store(
+ JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN)
+ if token_type in ('refresh', 'all'):
+ self.cache.store(
+ JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN)
+
+ def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
+ return self._download_json(
+ url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ **self._API_HEADERS,
+ **headers,
+ }, expected_status=(400, 403, 474))
+
+ def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
+ return self._call_api(
+ f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
+ None, note=note, headers=headers, data=data)
+
+ def _refresh_token(self):
+ if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID:
+ raise ExtractorError('User token has expired', expected=True)
+ response = self._call_auth_api(
+ 'token', 'refreshtoken', 'Refreshing token',
+ headers={'accesstoken': self._ACCESS_TOKEN}, data={
+ **self._APP_NAME,
+ 'deviceId': self._DEVICE_ID,
+ 'refreshToken': self._REFRESH_TOKEN,
+ **self._APP_VERSION,
+ })
+ refresh_token = response.get('refreshTokenId')
+ if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN:
+ JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
+ self._cache_token('refresh')
+ JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
+ self._cache_token('access')
+
+ def _fetch_guest_token(self):
+ JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
+ guest_token = self._call_auth_api(
+ 'token', 'guest', 'Downloading guest token', data={
+ **self._APP_NAME,
+ 'deviceType': 'phone',
+ 'os': 'ios',
+ 'deviceId': self._DEVICE_ID,
+ 'freshLaunch': False,
+ 'adId': self._DEVICE_ID,
+ **self._APP_VERSION,
+ })
+ self._GUEST_TOKEN = guest_token['authToken']
+ self._USER_ID = guest_token['userId']
+
+ def _call_login_api(self, endpoint, guest_token, data, note):
+ return self._call_auth_api(
+ 'user', f'loginotp/{endpoint}', note, headers={
+ **self.geo_verification_headers(),
+ 'accesstoken': self._GUEST_TOKEN,
+ **self._APP_NAME,
+ **traverse_obj(guest_token, 'data', {
+ 'deviceType': ('deviceType', {str}),
+ 'os': ('os', {str}),
+ })}, data=data)
+
+ def _is_token_expired(self, token):
+ return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
+
+ def _perform_login(self, username, password):
+ if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
+ return
+
+ UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
+
+ if username.lower() == 'token':
+ if try_call(lambda: jwt_decode_hs256(password)):
+ JioCinemaBaseIE._ACCESS_TOKEN = password
+ refresh_hint = 'the `refreshToken` UUID from your browser local storage'
+ refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
+ if not refresh_token:
+ self.to_screen(
+ 'To extend the life of your login session, in addition to your access token, '
+ 'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" '
+ f'where REFRESH_TOKEN is {refresh_hint}')
+ elif re.fullmatch(UUID_RE, refresh_token):
+ JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
+ else:
+ self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}')
+ else:
+ raise ExtractorError(
+ f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True)
+
+ elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password):
+ JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh')
+ JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access')
+ if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN:
+ raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True)
+
+ elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password):
+ self._fetch_guest_token()
+ guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
+ initial_data = {
+ 'number': base64.b64encode(password.encode()).decode(),
+ **self._APP_VERSION,
+ }
+ response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
+ if not traverse_obj(response, ('OTPInfo', {dict})):
+ raise ExtractorError('There was a problem with the phone number login attempt')
+
+ is_iphone = guest_token.get('os') == 'ios'
+ response = self._call_login_api('verify', guest_token, {
+ 'deviceInfo': {
+ 'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
+ 'info': {
+ 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
+ 'androidId': self._DEVICE_ID,
+ 'type': 'iOS' if is_iphone else 'Android'
+ }
+ },
+ **initial_data,
+ 'otp': self._get_tfa_info('the one-time password sent to your phone')
+ }, 'Submitting OTP')
+ if traverse_obj(response, 'code') == 1043:
+ raise ExtractorError('Wrong OTP', expected=True)
+ JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken']
+ JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
+
+ else:
+ raise ExtractorError(self._LOGIN_HINT, expected=True)
+
+ user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data']
+ JioCinemaBaseIE._USER_ID = user_token['userId']
+ JioCinemaBaseIE._DEVICE_ID = user_token['deviceId']
+ if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device':
+ self._cache_token('all')
+ if self.get_param('cachedir') is not False:
+ self.to_screen(
+ f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"')
+ elif not JioCinemaBaseIE._REFRESH_TOKEN:
+ JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(
+ JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh')
+ if JioCinemaBaseIE._REFRESH_TOKEN:
+ self._cache_token('access')
+ self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"')
+ if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN):
+ self._refresh_token()
+
+
+class JioCinemaIE(JioCinemaBaseIE):
+ IE_NAME = 'jiocinema'
+ _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})'
+ _TESTS = [{
+ 'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
+ 'info_dict': {
+ 'id': '3759931',
+ 'ext': 'mp4',
+ 'title': 'Pradeep to stop the wedding?',
+ 'description': 'md5:75f72d1d1a66976633345a3de6d672b1',
+ 'episode': 'Pradeep to stop the wedding?',
+ 'episode_number': 89,
+ 'season': 'Agnisakshi…Ek Samjhauta-S1',
+ 'season_number': 1,
+ 'series': 'Agnisakshi Ek Samjhauta',
+ 'duration': 1238.0,
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'age_limit': 13,
+ 'season_id': '3698031',
+ 'upload_date': '20230606',
+ 'timestamp': 1686009600,
+ 'release_date': '20230607',
+ 'genres': ['Drama'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
+ 'info_dict': {
+ 'id': '3754021',
+ 'ext': 'mp4',
+ 'title': 'Bhediya',
+ 'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0',
+ 'episode': 'Bhediya',
+ 'duration': 8500.0,
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'age_limit': 13,
+ 'upload_date': '20230525',
+ 'timestamp': 1685026200,
+ 'release_date': '20230524',
+ 'genres': ['Comedy'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _extract_formats_and_subtitles(self, playback, video_id):
+ m3u8_url = traverse_obj(playback, (
+ 'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any))
+ if not m3u8_url: # DRM-only content only serves dash urls
+ self.report_drm(video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
+ self._remove_duplicate_formats(formats)
+
+ return {
+ # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
+ 'formats': traverse_obj(formats, (
+ lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
+ self._fetch_guest_token()
+ elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
+ self._refresh_token()
+
+ playback = self._call_api(
+ f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id,
+ 'Downloading playback JSON', headers={
+ **self.geo_verification_headers(),
+ 'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
+ **self._APP_NAME,
+ 'deviceid': self._DEVICE_ID,
+ 'uniqueid': self._USER_ID,
+ 'x-apisignatures': self._API_SIGNATURES,
+ 'x-platform': 'androidweb',
+ 'x-platform-token': 'web',
+ }, data={
+ '4k': False,
+ 'ageGroup': '18+',
+ 'appVersion': '3.4.0',
+ 'bitrateProfile': 'xhdpi',
+ 'capability': {
+ 'drmCapability': {
+ 'aesSupport': 'yes',
+ 'fairPlayDrmSupport': 'none',
+ 'playreadyDrmSupport': 'none',
+ 'widevineDRMSupport': 'none'
+ },
+ 'frameRateCapability': [{
+ 'frameRateSupport': '30fps',
+ 'videoQuality': '1440p'
+ }]
+ },
+ 'continueWatchingRequired': False,
+ 'dolby': False,
+ 'downloadRequest': False,
+ 'hevc': False,
+ 'kidsSafe': False,
+ 'manufacturer': 'Windows',
+ 'model': 'Windows',
+ 'multiAudioRequired': True,
+ 'osVersion': '10',
+ 'parentalPinValid': True,
+ 'x-apisignatures': self._API_SIGNATURES
+ })
+
+ status_code = traverse_obj(playback, ('code', {int}))
+ if status_code == 474:
+ self.raise_geo_restricted(countries=['IN'])
+ elif status_code == 1008:
+ error_msg = 'This content is only available for premium users'
+ if self._ACCESS_TOKEN:
+ raise ExtractorError(error_msg, expected=True)
+ self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None)
+ elif status_code == 400:
+ raise ExtractorError('The requested content is not available', expected=True)
+ elif status_code is not None and status_code != 200:
+ raise ExtractorError(
+ f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}')
+
+ metadata = self._download_json(
+ f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details',
+ video_id, fatal=False, query={
+ 'ids': f'include:{video_id}',
+ 'responseType': 'common',
+ 'devicePlatformType': 'desktop',
+ })
+
+ return {
+ 'id': video_id,
+ 'http_headers': self._API_HEADERS,
+ **self._extract_formats_and_subtitles(playback, video_id),
+ **traverse_obj(playback, ('data', {
+ # fallback metadata
+ 'title': ('name', {str}),
+ 'description': ('fullSynopsis', {str}),
+ 'series': ('show', 'name', {str}, {lambda x: x or None}),
+ 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
+ 'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}),
+ 'episode': ('fullTitle', {str}),
+ 'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}),
+ 'age_limit': ('ageNemonic', {parse_age_limit}),
+ 'duration': ('totalDuration', {float_or_none}),
+ 'thumbnail': ('images', {url_or_none}),
+ })),
+ **traverse_obj(metadata, ('result', 0, {
+ 'title': ('fullTitle', {str}),
+ 'description': ('fullSynopsis', {str}),
+ 'series': ('showName', {str}, {lambda x: x or None}),
+ 'season': ('seasonName', {str}, {lambda x: x or None}),
+ 'season_number': ('season', {int_or_none}),
+ 'season_id': ('seasonId', {str}, {lambda x: x or None}),
+ 'episode': ('fullTitle', {str}),
+ 'episode_number': ('episode', {int_or_none}),
+ 'timestamp': ('uploadTime', {int_or_none}),
+ 'release_date': ('telecastDate', {str}),
+ 'age_limit': ('ageNemonic', {parse_age_limit}),
+ 'duration': ('duration', {float_or_none}),
+ 'genres': ('genres', ..., {str}),
+ 'thumbnail': ('seo', 'ogImage', {url_or_none}),
+ })),
+ }
+
+
+class JioCinemaSeriesIE(JioCinemaBaseIE):
+ IE_NAME = 'jiocinema:series'
+ _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})'
+ _TESTS = [{
+ 'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
+ 'info_dict': {
+ 'id': '3499917',
+ 'title': 'naagin',
+ },
+ 'playlist_mincount': 120,
+ }]
+
+ def _entries(self, series_id):
+ seasons = self._download_json(
+ f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
+ 'Downloading series metadata JSON', query={
+ 'sort': 'season:asc',
+ 'id': series_id,
+ 'responseType': 'common',
+ })
+
+ for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
+ season_id = season['id']
+ label = season.get('season') or season_num
+ for page_num in itertools.count(1):
+ episodes = traverse_obj(self._download_json(
+ f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
+ season_id, f'Downloading season {label} page {page_num} JSON', query={
+ 'sort': 'episode:asc',
+ 'id': season_id,
+ 'responseType': 'common',
+ 'page': page_num,
+ }), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
+ if not episodes:
+ break
+ for episode in episodes:
+ yield self.url_result(
+ episode['slug'], JioCinemaIE, **traverse_obj(episode, {
+ 'video_id': 'id',
+ 'video_title': ('fullTitle', {str}),
+ 'season_number': ('season', {int_or_none}),
+ 'episode_number': ('episode', {int_or_none}),
+ }))
+
+ def _real_extract(self, url):
+ slug, series_id = self._match_valid_url(url).group('slug', 'id')
+ return self.playlist_result(self._entries(series_id), series_id, slug)
diff --git a/yt_dlp/extractor/jiosaavn.py b/yt_dlp/extractor/jiosaavn.py
index d7f0a2d..35fb3fd 100644
--- a/yt_dlp/extractor/jiosaavn.py
+++ b/yt_dlp/extractor/jiosaavn.py
@@ -1,10 +1,12 @@
import functools
+import math
+import re
from .common import InfoExtractor
from ..utils import (
- format_field,
+ InAdvancePagedList,
+ clean_html,
int_or_none,
- js_to_json,
make_archive_id,
smuggle_url,
unsmuggle_url,
@@ -16,6 +18,7 @@ from ..utils.traversal import traverse_obj
class JioSaavnBaseIE(InfoExtractor):
+ _API_URL = 'https://www.jiosaavn.com/api.php'
_VALID_BITRATES = {'16', '32', '64', '128', '320'}
@functools.cached_property
@@ -30,7 +33,7 @@ class JioSaavnBaseIE(InfoExtractor):
def _extract_formats(self, song_data):
for bitrate in self.requested_bitrates:
media_data = self._download_json(
- 'https://www.jiosaavn.com/api.php', song_data['id'],
+ self._API_URL, song_data['id'],
f'Downloading format info for {bitrate}',
fatal=False, data=urlencode_postdata({
'__call': 'song.generateAuthToken',
@@ -50,31 +53,45 @@ class JioSaavnBaseIE(InfoExtractor):
'vcodec': 'none',
}
- def _extract_song(self, song_data):
+ def _extract_song(self, song_data, url=None):
info = traverse_obj(song_data, {
'id': ('id', {str}),
- 'title': ('title', 'text', {str}),
- 'album': ('album', 'text', {str}),
- 'thumbnail': ('image', 0, {url_or_none}),
+ 'title': ('song', {clean_html}),
+ 'album': ('album', {clean_html}),
+ 'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
'duration': ('duration', {int_or_none}),
'view_count': ('play_count', {int_or_none}),
'release_year': ('year', {int_or_none}),
- 'artists': ('artists', lambda _, v: v['role'] == 'singer', 'name', {str}),
- 'webpage_url': ('perma_url', {url_or_none}), # for song, playlist extraction
+ 'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
+ 'webpage_url': ('perma_url', {url_or_none}),
})
- if not info.get('webpage_url'): # for album extraction / fallback
- info['webpage_url'] = format_field(
- song_data, [('title', 'action')], 'https://www.jiosaavn.com%s') or None
- if webpage_url := info['webpage_url']:
- info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, url_basename(webpage_url))]
+ if webpage_url := info.get('webpage_url') or url:
+ info['display_id'] = url_basename(webpage_url)
+ info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
return info
- def _extract_initial_data(self, url, display_id):
- webpage = self._download_webpage(url, display_id)
- return self._search_json(
- r'window\.__INITIAL_DATA__\s*=', webpage,
- 'initial data', display_id, transform_source=js_to_json)
+ def _call_api(self, type_, token, note='API', params={}):
+ return self._download_json(
+ self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
+ query={
+ '__call': 'webapi.get',
+ '_format': 'json',
+ '_marker': '0',
+ 'ctx': 'web6dot0',
+ 'token': token,
+ 'type': type_,
+ **params,
+ })
+
+ def _yield_songs(self, playlist_data):
+ for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
+ song_info = self._extract_song(song_data)
+ url = smuggle_url(song_info['webpage_url'], {
+ 'id': song_data['id'],
+ 'encrypted_media_url': song_data['encrypted_media_url'],
+ })
+ yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
class JioSaavnSongIE(JioSaavnBaseIE):
@@ -85,10 +102,11 @@ class JioSaavnSongIE(JioSaavnBaseIE):
'md5': '3b84396d15ed9e083c3106f1fa589c04',
'info_dict': {
'id': 'IcoLuefJ',
+ 'display_id': 'OQsEfQFVUXk',
'ext': 'm4a',
'title': 'Leja Re',
'album': 'Leja Re',
- 'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
+ 'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
'duration': 205,
'view_count': int,
'release_year': 2018,
@@ -111,8 +129,8 @@ class JioSaavnSongIE(JioSaavnBaseIE):
result = {'id': song_data['id']}
else:
# only extract metadata if this is not a url_transparent result
- song_data = self._extract_initial_data(url, self._match_id(url))['song']['song']
- result = self._extract_song(song_data)
+ song_data = self._call_api('song', self._match_id(url))['songs'][0]
+ result = self._extract_song(song_data, url)
result['formats'] = list(self._extract_formats(song_data))
return result
@@ -130,19 +148,12 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
'playlist_count': 10,
}]
- def _entries(self, playlist_data):
- for song_data in traverse_obj(playlist_data, (
- 'modules', lambda _, x: x['key'] == 'list', 'data', lambda _, v: v['title']['action'])):
- song_info = self._extract_song(song_data)
- # album song data is missing artists and release_year, need to re-extract metadata
- yield self.url_result(song_info['webpage_url'], JioSaavnSongIE, **song_info)
-
def _real_extract(self, url):
display_id = self._match_id(url)
- album_data = self._extract_initial_data(url, display_id)['albumView']
+ album_data = self._call_api('album', display_id)
return self.playlist_result(
- self._entries(album_data), display_id, traverse_obj(album_data, ('album', 'title', 'text', {str})))
+ self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
class JioSaavnPlaylistIE(JioSaavnBaseIE):
@@ -154,21 +165,30 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
'id': 'LlJ8ZWT1ibN5084vKHRj2Q__',
'title': 'Mood English',
},
- 'playlist_mincount': 50,
+ 'playlist_mincount': 301,
+ }, {
+ 'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-hindi/DVR,pFUOwyXqIp77B1JF,A__',
+ 'info_dict': {
+ 'id': 'DVR,pFUOwyXqIp77B1JF,A__',
+ 'title': 'Mood Hindi',
+ },
+ 'playlist_mincount': 801,
}]
+ _PAGE_SIZE = 50
- def _entries(self, playlist_data):
- for song_data in traverse_obj(playlist_data, ('list', lambda _, v: v['perma_url'])):
- song_info = self._extract_song(song_data)
- url = smuggle_url(song_info['webpage_url'], {
- 'id': song_data['id'],
- 'encrypted_media_url': song_data['encrypted_media_url'],
- })
- yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
+ def _fetch_page(self, token, page):
+ return self._call_api(
+ 'playlist', token, f'playlist page {page}', {'p': page, 'n': self._PAGE_SIZE})
+
+ def _entries(self, token, first_page_data, page):
+ page_data = first_page_data if not page else self._fetch_page(token, page + 1)
+ yield from self._yield_songs(page_data)
def _real_extract(self, url):
display_id = self._match_id(url)
- playlist_data = self._extract_initial_data(url, display_id)['playlist']['playlist']
+ playlist_data = self._fetch_page(display_id, 1)
+ total_pages = math.ceil(int(playlist_data['list_count']) / self._PAGE_SIZE)
- return self.playlist_result(
- self._entries(playlist_data), display_id, traverse_obj(playlist_data, ('title', 'text', {str})))
+ return self.playlist_result(InAdvancePagedList(
+ functools.partial(self._entries, display_id, playlist_data),
+ total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py
index 245fe73..8069fea 100644
--- a/yt_dlp/extractor/jove.py
+++ b/yt_dlp/extractor/jove.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- unified_strdate
-)
+from ..utils import ExtractorError, unified_strdate
class JoveIE(InfoExtractor):
diff --git a/yt_dlp/extractor/jstream.py b/yt_dlp/extractor/jstream.py
index 3e2e627..00ac7cc 100644
--- a/yt_dlp/extractor/jstream.py
+++ b/yt_dlp/extractor/jstream.py
@@ -1,6 +1,6 @@
import base64
-import re
import json
+import re
from .common import InfoExtractor
from ..utils import (
diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py
index 43055e8..563aa2d 100644
--- a/yt_dlp/extractor/kakao.py
+++ b/yt_dlp/extractor/kakao.py
@@ -3,8 +3,8 @@ from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
- strip_or_none,
str_or_none,
+ strip_or_none,
traverse_obj,
unified_timestamp,
)
diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py
index 95e2dee..4752d5a 100644
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@@ -4,18 +4,18 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_parse_qs,
+ compat_urlparse,
)
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
format_field,
int_or_none,
- unsmuggle_url,
+ remove_start,
smuggle_url,
traverse_obj,
- remove_start
+ unsmuggle_url,
)
diff --git a/yt_dlp/extractor/kankanews.py b/yt_dlp/extractor/kankanews.py
index 8f247b3..3d74c74 100644
--- a/yt_dlp/extractor/kankanews.py
+++ b/yt_dlp/extractor/kankanews.py
@@ -1,7 +1,7 @@
-import time
+import hashlib
import random
import string
-import hashlib
+import time
import urllib.parse
from .common import InfoExtractor
diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py
index 3c93ded..b776671 100644
--- a/yt_dlp/extractor/kuwo.py
+++ b/yt_dlp/extractor/kuwo.py
@@ -3,10 +3,10 @@ import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
- get_element_by_id,
- clean_html,
ExtractorError,
InAdvancePagedList,
+ clean_html,
+ get_element_by_id,
remove_start,
)
diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py
index e7d2f8a..708cb54 100644
--- a/yt_dlp/extractor/lci.py
+++ b/yt_dlp/extractor/lci.py
@@ -1,9 +1,25 @@
from .common import InfoExtractor
+from .wat import WatIE
+from ..utils import ExtractorError, int_or_none
+from ..utils.traversal import traverse_obj
class LCIIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P<id>\d+)\.html'
_TESTS = [{
+ 'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html',
+ 'info_dict': {
+ 'id': '14113788',
+ 'ext': 'mp4',
+ 'title': '24H Pujadas du vendredi 24 mai 2024',
+ 'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg',
+ 'upload_date': '20240524',
+ 'duration': 6158,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
'info_dict': {
'id': '13875948',
@@ -24,5 +40,10 @@ class LCIIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id')
- return self.url_result('wat:' + wat_id, 'Wat', wat_id)
+ next_data = self._search_nextjs_data(webpage, video_id)
+ wat_id = traverse_obj(next_data, (
+ 'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any))
+ if wat_id is None:
+ raise ExtractorError('Could not find wat_id')
+
+ return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id))
diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py
index 9846319..6287419 100644
--- a/yt_dlp/extractor/lcp.py
+++ b/yt_dlp/extractor/lcp.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .arkena import ArkenaIE
+from .common import InfoExtractor
class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE
diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py
index 10fb5d4..1a3ada1 100644
--- a/yt_dlp/extractor/lecture2go.py
+++ b/yt_dlp/extractor/lecture2go.py
@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..utils import (
determine_ext,
determine_protocol,
- parse_duration,
int_or_none,
+ parse_duration,
)
diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py
index 629d208..90f0268 100644
--- a/yt_dlp/extractor/lecturio.py
+++ b/yt_dlp/extractor/lecturio.py
@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
float_or_none,
int_or_none,
str_or_none,
diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py
index 5d61a60..a113b3d 100644
--- a/yt_dlp/extractor/leeco.py
+++ b/yt_dlp/extractor/leeco.py
@@ -11,9 +11,9 @@ from ..compat import (
compat_urllib_parse_urlencode,
)
from ..utils import (
+ ExtractorError,
determine_ext,
encode_data_uri,
- ExtractorError,
int_or_none,
orderedSet,
parse_iso8601,
diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py
index b76ca09..2979939 100644
--- a/yt_dlp/extractor/libraryofcongress.py
+++ b/yt_dlp/extractor/libraryofcongress.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import (
determine_ext,
float_or_none,
diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py
index 919cfcb..ea150a5 100644
--- a/yt_dlp/extractor/lifenews.py
+++ b/yt_dlp/extractor/lifenews.py
@@ -6,8 +6,8 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_iso8601,
remove_end,
diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py
index 4e50f10..1ff091d 100644
--- a/yt_dlp/extractor/limelight.py
+++ b/yt_dlp/extractor/limelight.py
@@ -3,13 +3,13 @@ import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
determine_ext,
float_or_none,
int_or_none,
smuggle_url,
try_get,
unsmuggle_url,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py
index e12f467..2a7c6f0 100644
--- a/yt_dlp/extractor/linkedin.py
+++ b/yt_dlp/extractor/linkedin.py
@@ -7,8 +7,8 @@ from ..utils import (
extract_attributes,
float_or_none,
int_or_none,
- srt_subtitles_timecode,
mimetype2ext,
+ srt_subtitles_timecode,
traverse_obj,
try_get,
url_or_none,
diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py
index fd9bba8..fa12a6a 100644
--- a/yt_dlp/extractor/mainstreaming.py
+++ b/yt_dlp/extractor/mainstreaming.py
@@ -1,14 +1,13 @@
import re
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
js_to_json,
parse_duration,
traverse_obj,
try_get,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py
index 2792e6e..44c321c 100644
--- a/yt_dlp/extractor/manoto.py
+++ b/yt_dlp/extractor/manoto.py
@@ -1,10 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- clean_html,
- int_or_none,
- traverse_obj
-)
-
+from ..utils import clean_html, int_or_none, traverse_obj
_API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}'
diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py
index 675ad8c..d040fb4 100644
--- a/yt_dlp/extractor/medaltv.py
+++ b/yt_dlp/extractor/medaltv.py
@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
- format_field,
float_or_none,
+ format_field,
int_or_none,
str_or_none,
traverse_obj,
diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py
index fcc4827..c015977 100644
--- a/yt_dlp/extractor/mediaklikk.py
+++ b/yt_dlp/extractor/mediaklikk.py
@@ -1,14 +1,11 @@
+from .common import InfoExtractor
+from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
traverse_obj,
unified_strdate,
url_or_none,
)
-from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_str
-)
class MediaKlikkIE(InfoExtractor):
diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py
index e04a1ce..b7df5c7 100644
--- a/yt_dlp/extractor/mediaset.py
+++ b/yt_dlp/extractor/mediaset.py
@@ -5,11 +5,11 @@ from .theplatform import ThePlatformBaseIE
from ..utils import (
ExtractorError,
GeoRestrictedError,
- int_or_none,
OnDemandPagedList,
+ int_or_none,
try_get,
- urljoin,
update_url_query,
+ urljoin,
)
diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py
index 7ea78ab..d3fec4e 100644
--- a/yt_dlp/extractor/mediasite.py
+++ b/yt_dlp/extractor/mediasite.py
@@ -1,5 +1,5 @@
-import re
import json
+import re
from .common import InfoExtractor
from ..compat import (
@@ -10,16 +10,15 @@ from ..utils import (
ExtractorError,
float_or_none,
mimetype2ext,
+ smuggle_url,
str_or_none,
try_call,
try_get,
- smuggle_url,
unsmuggle_url,
url_or_none,
urljoin,
)
-
_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py
index 5f5f160..f6a0b41 100644
--- a/yt_dlp/extractor/microsoftstream.py
+++ b/yt_dlp/extractor/microsoftstream.py
@@ -3,8 +3,8 @@ import base64
from .common import InfoExtractor
from ..utils import (
merge_dicts,
- parse_iso8601,
parse_duration,
+ parse_iso8601,
parse_resolution,
try_get,
url_basename,
diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py
index f64d575..caf60c8 100644
--- a/yt_dlp/extractor/mildom.py
+++ b/yt_dlp/extractor/mildom.py
@@ -4,11 +4,11 @@ import uuid
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
determine_ext,
dict_get,
- ExtractorError,
float_or_none,
- OnDemandPagedList,
traverse_obj,
)
diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py
index 38cc0c2..979584e 100644
--- a/yt_dlp/extractor/mit.py
+++ b/yt_dlp/extractor/mit.py
@@ -1,11 +1,11 @@
-import re
import json
+import re
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
get_element_by_id,
)
diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py
index b980fd0..58c4a23 100644
--- a/yt_dlp/extractor/mixch.py
+++ b/yt_dlp/extractor/mixch.py
@@ -1,6 +1,12 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
-from ..utils import ExtractorError, UserNotLive, int_or_none, url_or_none
+from ..utils import (
+ ExtractorError,
+ UserNotLive,
+ int_or_none,
+ str_or_none,
+ url_or_none,
+)
from ..utils.traversal import traverse_obj
@@ -9,17 +15,20 @@ class MixchIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://mixch.tv/u/16236849/live',
+ 'url': 'https://mixch.tv/u/16943797/live',
'skip': 'don\'t know if this live persists',
'info_dict': {
- 'id': '16236849',
- 'title': '24配信シェア⭕️投票🙏💦',
- 'comment_count': 13145,
- 'view_count': 28348,
- 'timestamp': 1636189377,
- 'uploader': '🦥伊咲👶🏻#フレアワ',
- 'uploader_id': '16236849',
- }
+ 'id': '16943797',
+ 'ext': 'mp4',
+ 'title': '#EntView #カリナ #セブチ 2024-05-05 06:58',
+ 'comment_count': int,
+ 'view_count': int,
+ 'timestamp': 1714726805,
+ 'uploader': 'Ent.View K-news🎶💕',
+ 'uploader_id': '16943797',
+ 'live_status': 'is_live',
+ 'upload_date': '20240503',
+ },
}, {
'url': 'https://mixch.tv/u/16137876/live',
'only_matching': True,
@@ -48,8 +57,20 @@ class MixchIE(InfoExtractor):
'protocol': 'm3u8',
}],
'is_live': True,
+ '__post_extractor': self.extract_comments(video_id),
}
+ def _get_comments(self, video_id):
+ yield from traverse_obj(self._download_json(
+ f'https://mixch.tv/api-web/lives/{video_id}/messages', video_id,
+ note='Downloading comments', errnote='Failed to download comments'), (..., {
+ 'author': ('name', {str}),
+ 'author_id': ('user_id', {str_or_none}),
+ 'id': ('message_id', {str}, {lambda x: x or None}),
+ 'text': ('body', {str}),
+ 'timestamp': ('created', {int}),
+ }))
+
class MixchArchiveIE(InfoExtractor):
IE_NAME = 'mixch:archive'
diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py
index a69a12e..411d41c 100644
--- a/yt_dlp/extractor/monstercat.py
+++ b/yt_dlp/extractor/monstercat.py
@@ -8,10 +8,10 @@ from ..utils import (
get_element_html_by_class,
get_element_text_and_html_by_tag,
int_or_none,
- unified_strdate,
strip_or_none,
traverse_obj,
try_call,
+ unified_strdate,
)
diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py
index 668c098..ed5be4f 100644
--- a/yt_dlp/extractor/moviepilot.py
+++ b/yt_dlp/extractor/moviepilot.py
@@ -1,5 +1,5 @@
-from .dailymotion import DailymotionIE
from .common import InfoExtractor
+from .dailymotion import DailymotionIE
class MoviepilotIE(InfoExtractor):
@@ -14,7 +14,7 @@ class MoviepilotIE(InfoExtractor):
'display_id': 'interstellar-2',
'ext': 'mp4',
'title': 'Interstellar',
- 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1ZganMw4HVXg/x1080',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1.*/x1080',
'timestamp': 1605010596,
'description': 'md5:0ae9cb452af52610c9ffc60f2fd0474c',
'uploader': 'Moviepilot',
@@ -71,7 +71,7 @@ class MoviepilotIE(InfoExtractor):
'age_limit': 0,
'duration': 82,
'upload_date': '20201109',
- 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Zg3lxLv9j5u/x1080',
+ 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Z.*/x1080',
'uploader': 'Moviepilot',
'like_count': int,
'view_count': int,
@@ -92,6 +92,6 @@ class MoviepilotIE(InfoExtractor):
'ie_key': DailymotionIE.ie_key(),
'display_id': video_id,
'title': clip.get('title'),
- 'url': f'https://www.dailymotion.com/video/{clip["videoRemoteId"]}',
+ 'url': f'https://www.dailymotion.com/video/{clip["video"]["remoteId"]}',
'description': clip.get('summary'),
}
diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py
index cdd8ba4..6e0ea26 100644
--- a/yt_dlp/extractor/movingimage.py
+++ b/yt_dlp/extractor/movingimage.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- unescapeHTML,
parse_duration,
+ unescapeHTML,
)
diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py
index 77d1806..79728e1 100644
--- a/yt_dlp/extractor/msn.py
+++ b/yt_dlp/extractor/msn.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py
index edc4144..8a8a5fe 100644
--- a/yt_dlp/extractor/n1.py
+++ b/yt_dlp/extractor/n1.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- unified_timestamp,
extract_attributes,
+ unified_timestamp,
)
diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py
index 885557e..26400e3 100644
--- a/yt_dlp/extractor/naver.py
+++ b/yt_dlp/extractor/naver.py
@@ -4,8 +4,8 @@ import hmac
import itertools
import json
import re
-import urllib.parse
import time
+import urllib.parse
from .common import InfoExtractor
from ..utils import (
diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py
index 81d11e3..ec4d636 100644
--- a/yt_dlp/extractor/nba.py
+++ b/yt_dlp/extractor/nba.py
@@ -7,9 +7,9 @@ from ..compat import (
compat_urllib_parse_unquote,
)
from ..utils import (
+ OnDemandPagedList,
int_or_none,
merge_dicts,
- OnDemandPagedList,
parse_duration,
parse_iso8601,
parse_qs,
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index 267fa83..e88f98a 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -3,9 +3,9 @@ import json
import re
import xml.etree.ElementTree
+from .adobepass import AdobePassIE
from .common import InfoExtractor
from .theplatform import ThePlatformIE, default_ns
-from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote
from ..networking import HEADRequest
from ..utils import (
diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py
index 41ea362..243221d 100644
--- a/yt_dlp/extractor/ndr.py
+++ b/yt_dlp/extractor/ndr.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
merge_dicts,
parse_iso8601,
diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py
index 73b33a9..b54c12e 100644
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@@ -561,7 +561,8 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'timestamp': ('createTime', {self.kilo_or_none}),
})
- if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
+ if not self._yes_playlist(
+ info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
formats = self.extract_formats(info['mainSong'])
return {
diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py
index 6f78728..968c972 100644
--- a/yt_dlp/extractor/nfb.py
+++ b/yt_dlp/extractor/nfb.py
@@ -5,7 +5,6 @@ from ..utils import (
merge_dicts,
parse_count,
url_or_none,
- urljoin,
)
from ..utils.traversal import traverse_obj
@@ -16,8 +15,7 @@ class NFBBaseIE(InfoExtractor):
def _extract_ep_data(self, webpage, video_id, fatal=False):
return self._search_json(
- r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
- contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
+ r'episodesData\s*:', webpage, 'episode data', video_id, fatal=fatal) or {}
def _extract_ep_info(self, data, video_id, slug=None):
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
@@ -224,18 +222,14 @@ class NFBIE(NFBBaseIE):
# type_ can change from film to serie(s) after redirect; new slug may have episode number
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
- embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
- r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
- video_id = self._match_id(embed_url) # embed url has unique slug
- player = self._download_webpage(embed_url, video_id, 'Downloading player page')
- if 'MESSAGE_GEOBLOCKED' in player:
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ player_data = self._search_json(
+ r'window\.PLAYER_OPTIONS\[[^\]]+\]\s*=', webpage, 'player data', slug)
+ video_id = self._match_id(player_data['overlay']['url']) # overlay url always has unique slug
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
- video_id, 'mp4', m3u8_id='hls')
+ player_data['source'], video_id, 'mp4', m3u8_id='hls')
- if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
+ if dv_source := url_or_none(player_data.get('dvSource')):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
for fmt in fmts:
@@ -246,17 +240,16 @@ class NFBIE(NFBBaseIE):
info = {
'id': video_id,
'title': self._html_search_regex(
- r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
+ r'["\']nfb_version_title["\']\s*:\s*["\']([^"\']+)',
webpage, 'title', default=None),
'description': self._html_search_regex(
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
webpage, 'description', default=None),
- 'thumbnail': self._html_search_regex(
- r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
+ 'thumbnail': url_or_none(player_data.get('poster')),
'uploader': self._html_search_regex(
- r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
+ r'<[^>]+\bitemprop=["\']director["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
'release_year': int_or_none(self._html_search_regex(
- r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
+ r'["\']nfb_version_year["\']\s*:\s*["\']([^"\']+)',
webpage, 'release_year', default=None)),
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py
index febad8f..be732a3 100644
--- a/yt_dlp/extractor/nfhsnetwork.py
+++ b/yt_dlp/extractor/nfhsnetwork.py
@@ -1,11 +1,5 @@
from .common import InfoExtractor
-
-
-from ..utils import (
- try_get,
- unified_strdate,
- unified_timestamp
-)
+from ..utils import try_get, unified_strdate, unified_timestamp
class NFHSNetworkIE(InfoExtractor):
diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py
index 2521c40..64cddb4 100644
--- a/yt_dlp/extractor/nhl.py
+++ b/yt_dlp/extractor/nhl.py
@@ -3,8 +3,8 @@ from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
- parse_iso8601,
parse_duration,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/ninenews.py b/yt_dlp/extractor/ninenews.py
index 900d9ba..0b4f47b 100644
--- a/yt_dlp/extractor/ninenews.py
+++ b/yt_dlp/extractor/ninenews.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
from ..utils import ExtractorError
from ..utils.traversal import traverse_obj
diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py
index c655b75..b7170b0 100644
--- a/yt_dlp/extractor/ninenow.py
+++ b/yt_dlp/extractor/ninenow.py
@@ -2,8 +2,8 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
+ int_or_none,
smuggle_url,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py
index 35d1311..249e7cd 100644
--- a/yt_dlp/extractor/nitter.py
+++ b/yt_dlp/extractor/nitter.py
@@ -1,13 +1,14 @@
+import random
+import re
+
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
+ determine_ext,
parse_count,
- unified_timestamp,
remove_end,
- determine_ext,
+ unified_timestamp,
)
-import re
-import random
class NitterIE(InfoExtractor):
diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py
index cddc72f..513529b 100644
--- a/yt_dlp/extractor/nobelprize.py
+++ b/yt_dlp/extractor/nobelprize.py
@@ -1,11 +1,11 @@
from .common import InfoExtractor
from ..utils import (
- js_to_json,
- mimetype2ext,
determine_ext,
- update_url_query,
get_element_by_attribute,
int_or_none,
+ js_to_json,
+ mimetype2ext,
+ update_url_query,
)
diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py
index c7b8038..19cb972 100644
--- a/yt_dlp/extractor/noz.py
+++ b/yt_dlp/extractor/noz.py
@@ -1,11 +1,11 @@
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
- int_or_none,
find_xpath_attr,
- xpath_text,
+ int_or_none,
update_url_query,
+ xpath_text,
)
-from ..compat import compat_urllib_parse_unquote
class NozIE(InfoExtractor):
diff --git a/yt_dlp/extractor/nts.py b/yt_dlp/extractor/nts.py
new file mode 100644
index 0000000..a801740
--- /dev/null
+++ b/yt_dlp/extractor/nts.py
@@ -0,0 +1,76 @@
+from .common import InfoExtractor
+from ..utils import parse_iso8601, url_or_none
+from ..utils.traversal import traverse_obj
+
+
+class NTSLiveIE(InfoExtractor):
+ IE_NAME = 'nts.live'
+ _VALID_URL = r'https?://(?:www\.)?nts\.live/shows/[^/?#]+/episodes/(?P<id>[^/?#]+)'
+ _TESTS = [
+ {
+ # embedded soundcloud
+ 'url': 'https://www.nts.live/shows/yu-su/episodes/yu-su-2nd-april-2024',
+ 'md5': 'b5444c04888c869d68758982de1a27d8',
+ 'info_dict': {
+ 'id': '1791563518',
+ 'ext': 'opus',
+ 'uploader_id': '995579326',
+ 'title': 'Pender Street Steppers & YU SU',
+ 'timestamp': 1712073600,
+ 'upload_date': '20240402',
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-qKcNO0z0AQGGbv9s-GljJCw-original.jpg',
+ 'license': 'all-rights-reserved',
+ 'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/user-643553014',
+ 'uploader': 'NTS Latest',
+ 'description': 'md5:cd00ac535a63caaad722483ae3ff802a',
+ 'duration': 10784.157,
+ 'genres': ['Deep House', 'House', 'Leftfield Disco', 'Jazz Fusion', 'Dream Pop'],
+ 'modified_timestamp': 1712564687,
+ 'modified_date': '20240408',
+ },
+ },
+ {
+ # embedded mixcloud
+ 'url': 'https://www.nts.live/shows/absolute-fiction/episodes/absolute-fiction-23rd-july-2022',
+ 'info_dict': {
+ 'id': 'NTSRadio_absolute-fiction-23rd-july-2022',
+ 'ext': 'webm',
+ 'like_count': int,
+ 'title': 'Absolute Fiction',
+ 'comment_count': int,
+ 'uploader_url': 'https://www.mixcloud.com/NTSRadio/',
+ 'description': 'md5:ba49da971ae8d71ee45813c52c5e2a04',
+ 'tags': [],
+ 'duration': 3529,
+ 'timestamp': 1658588400,
+ 'repost_count': int,
+ 'upload_date': '20220723',
+ 'uploader_id': 'NTSRadio',
+ 'thumbnail': 'https://thumbnailer.mixcloud.com/unsafe/1024x1024/extaudio/5/1/a/d/ae3e-1be9-4fd4-983e-9c3294226eac',
+ 'uploader': 'Mixcloud NTS Radio',
+ 'genres': ['Minimal Synth', 'Post Punk', 'Industrial '],
+ 'modified_timestamp': 1658842165,
+ 'modified_date': '20220726',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._search_json(r'window\._REACT_STATE_\s*=', webpage, 'react state', video_id)
+
+ return {
+ '_type': 'url_transparent',
+ **traverse_obj(data, ('episode', {
+ 'url': ('audio_sources', ..., 'url', {url_or_none}, any),
+ 'title': ('name', {str}),
+ 'description': ('description', {str}),
+ 'genres': ('genres', ..., 'value', {str}),
+ 'timestamp': ('broadcast', {parse_iso8601}),
+ 'modified_timestamp': ('updated', {parse_iso8601}),
+ })),
+ }
diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py
index ec54041..5670445 100644
--- a/yt_dlp/extractor/nuevo.py
+++ b/yt_dlp/extractor/nuevo.py
@@ -1,9 +1,5 @@
from .common import InfoExtractor
-
-from ..utils import (
- float_or_none,
- xpath_text
-)
+from ..utils import float_or_none, xpath_text
class NuevoBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py
index 6ac351c..0ef0ec7 100644
--- a/yt_dlp/extractor/nuvid.py
+++ b/yt_dlp/extractor/nuvid.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- parse_duration,
int_or_none,
+ parse_duration,
strip_or_none,
traverse_obj,
url_or_none,
diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py
index 062f9a8..0a12aea 100644
--- a/yt_dlp/extractor/nzherald.py
+++ b/yt_dlp/extractor/nzherald.py
@@ -3,10 +3,7 @@ import json
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- traverse_obj
-)
+from ..utils import ExtractorError, traverse_obj
class NZHeraldIE(InfoExtractor):
diff --git a/yt_dlp/extractor/odkmedia.py b/yt_dlp/extractor/odkmedia.py
index b852160..8321b07 100644
--- a/yt_dlp/extractor/odkmedia.py
+++ b/yt_dlp/extractor/odkmedia.py
@@ -7,7 +7,7 @@ from ..utils import (
GeoRestrictedError,
float_or_none,
traverse_obj,
- try_call
+ try_call,
)
diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py
index 61d1f40..5507d2f 100644
--- a/yt_dlp/extractor/olympics.py
+++ b/yt_dlp/extractor/olympics.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- try_get
-)
+from ..utils import int_or_none, try_get
class OlympicsReplayIE(InfoExtractor):
diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py
index a46211e..351b397 100644
--- a/yt_dlp/extractor/onenewsnz.py
+++ b/yt_dlp/extractor/onenewsnz.py
@@ -1,10 +1,6 @@
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
-
-from ..utils import (
- ExtractorError,
- traverse_obj
-)
+from ..utils import ExtractorError, traverse_obj
class OneNewsNZIE(InfoExtractor):
diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py
index 0d59e8c..da10f37 100644
--- a/yt_dlp/extractor/onet.py
+++ b/yt_dlp/extractor/onet.py
@@ -2,13 +2,13 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
+ NO_DEFAULT,
ExtractorError,
+ determine_ext,
float_or_none,
get_element_by_class,
int_or_none,
js_to_json,
- NO_DEFAULT,
parse_iso8601,
remove_start,
strip_or_none,
diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py
index 1fafd9a..12bf557 100644
--- a/yt_dlp/extractor/opencast.py
+++ b/yt_dlp/extractor/opencast.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_iso8601,
traverse_obj,
diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py
index 82a81c6..c9a96ae 100644
--- a/yt_dlp/extractor/openrec.py
+++ b/yt_dlp/extractor/openrec.py
@@ -1,4 +1,5 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
get_first,
@@ -8,7 +9,6 @@ from ..utils import (
unified_strdate,
unified_timestamp,
)
-from ..compat import compat_str
class OpenRecBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py
index d49909d..0e7a848 100644
--- a/yt_dlp/extractor/ora.py
+++ b/yt_dlp/extractor/ora.py
@@ -1,4 +1,5 @@
import re
+
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py
index 526e9ac..3c837be 100644
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@@ -3,203 +3,24 @@ import functools
import re
from .common import InfoExtractor
-from ..networking import HEADRequest
from ..utils import (
- InAdvancePagedList,
clean_html,
determine_ext,
float_or_none,
int_or_none,
- join_nonempty,
make_archive_id,
mimetype2ext,
orderedSet,
+ parse_age_limit,
remove_end,
- smuggle_url,
strip_jsonp,
try_call,
- unescapeHTML,
unified_strdate,
- unsmuggle_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
-class ORFTVthekIE(InfoExtractor):
- IE_NAME = 'orf:tvthek'
- IE_DESC = 'ORF TVthek'
- _VALID_URL = r'(?P<url>https?://tvthek\.orf\.at/(?:(?:[^/]+/){2}){1,2}(?P<id>\d+))(/[^/]+/(?P<vid>\d+))?(?:$|[?#])'
-
- _TESTS = [{
- 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079',
- 'info_dict': {
- 'id': '14121079',
- },
- 'playlist_count': 11,
- 'params': {'noplaylist': True}
- }, {
- 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
- 'info_dict': {
- 'id': '14121079',
- },
- 'playlist_count': 1,
- 'params': {'playlist_items': '5'}
- }, {
- 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150',
- 'info_dict': {
- 'id': '14121079',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '15083150',
- 'ext': 'mp4',
- 'description': 'md5:7be1c485425f5f255a5e4e4815e77d04',
- 'thumbnail': 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg',
- 'title': 'Umfrage: Welches Tier ist Sebastian Kurz?',
- }
- }],
- 'playlist_count': 1,
- 'params': {'noplaylist': True, 'skip_download': 'm3u8'}
- }, {
- 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
- 'playlist': [{
- 'md5': '2942210346ed779588f428a92db88712',
- 'info_dict': {
- 'id': '8896777',
- 'ext': 'mp4',
- 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
- 'description': 'md5:c1272f0245537812d4e36419c207b67d',
- 'duration': 2668,
- 'upload_date': '20141208',
- },
- }],
- 'skip': 'Blocked outside of Austria / Germany',
- }, {
- 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
- 'info_dict': {
- 'id': '7982259',
- 'ext': 'mp4',
- 'title': 'Best of Ingrid Thurnher',
- 'upload_date': '20140527',
- 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
- },
- 'params': {
- 'skip_download': True, # rtsp downloads
- },
- 'skip': 'Blocked outside of Austria / Germany',
- }, {
- 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
- 'only_matching': True,
- }, {
- 'url': 'http://tvthek.orf.at/profile/Universum/35429',
- 'only_matching': True,
- }]
-
- def _pagefunc(self, url, data_jsb, n, *, image=None):
- sd = data_jsb[n]
- video_id, title = str(sd['id']), sd['title']
- formats = []
- for fd in sd['sources']:
- src = url_or_none(fd.get('src'))
- if not src:
- continue
- format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd)
- ext = determine_ext(src)
- if ext == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- src, video_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {format_id} m3u8 manifest')
- if any('/geoprotection' in f['url'] for f in m3u8_formats):
- self.raise_geo_restricted()
- formats.extend(m3u8_formats)
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- src, video_id, f4m_id=format_id, fatal=False))
- elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- src, video_id, mpd_id=format_id, fatal=False, note=f'Downloading {format_id} mpd manifest'))
- else:
- formats.append({
- 'format_id': format_id,
- 'url': src,
- 'protocol': fd.get('protocol'),
- })
-
- # Check for geoblocking.
- # There is a property is_geoprotection, but that's always false
- geo_str = sd.get('geoprotection_string')
- http_url = next(
- (f['url'] for f in formats if re.match(r'^https?://.*\.mp4$', f['url'])),
- None) if geo_str else None
- if http_url:
- self._request_webpage(
- HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking',
- errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats')
-
- subtitles = {}
- for sub in sd.get('subtitles', []):
- sub_src = sub.get('src')
- if not sub_src:
- continue
- subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
- 'url': sub_src,
- })
-
- upload_date = unified_strdate(sd.get('created_date'))
-
- thumbnails = []
- preview = sd.get('preview_image_url')
- if preview:
- thumbnails.append({
- 'id': 'preview',
- 'url': preview,
- 'preference': 0,
- })
- image = sd.get('image_full_url') or image
- if image:
- thumbnails.append({
- 'id': 'full',
- 'url': image,
- 'preference': 1,
- })
-
- yield {
- 'id': video_id,
- 'title': title,
- 'webpage_url': smuggle_url(f'{url}/part/{video_id}', {'force_noplaylist': True}),
- 'formats': formats,
- 'subtitles': subtitles,
- 'description': sd.get('description'),
- 'duration': int_or_none(sd.get('duration_in_seconds')),
- 'upload_date': upload_date,
- 'thumbnails': thumbnails,
- }
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url)
- playlist_id, video_id, base_url = self._match_valid_url(url).group('id', 'vid', 'url')
- webpage = self._download_webpage(url, playlist_id)
-
- data_jsb = self._parse_json(
- self._search_regex(
- r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
- webpage, 'playlist', group='json'),
- playlist_id, transform_source=unescapeHTML)['playlist']['videos']
-
- if not self._yes_playlist(playlist_id, video_id, smuggled_data):
- data_jsb = [sd for sd in data_jsb if str(sd.get('id')) == video_id]
-
- playlist_count = len(data_jsb)
- image = self._og_search_thumbnail(webpage) if playlist_count == 1 else None
-
- page_func = functools.partial(self._pagefunc, base_url, data_jsb, image=image)
- return {
- '_type': 'playlist',
- 'entries': InAdvancePagedList(page_func, playlist_count, 1),
- 'id': playlist_id,
- }
-
-
class ORFRadioIE(InfoExtractor):
IE_NAME = 'orf:radio'
@@ -569,7 +390,7 @@ class ORFFM4StoryIE(InfoExtractor):
class ORFONIE(InfoExtractor):
IE_NAME = 'orf:on'
- _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
+ _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
'info_dict': {
@@ -582,33 +403,59 @@ class ORFONIE(InfoExtractor):
'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
+ '_old_archive_ids': ['orftvthek 14210000'],
+ }
+ }, {
+ 'url': 'https://on.orf.at/video/3220355',
+ 'md5': 'f94d98e667cf9a3851317efb4e136662',
+ 'info_dict': {
+ 'id': '3220355',
+ 'ext': 'mp4',
+ 'duration': 445.04,
+ 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png',
+ 'title': '50 Jahre Burgenland: Der Festumzug',
+ 'description': 'md5:1560bf855119544ee8c4fa5376a2a6b0',
+ 'media_type': 'episode',
+ 'timestamp': 52916400,
+ 'upload_date': '19710905',
+ '_old_archive_ids': ['orftvthek 3220355'],
}
}]
- def _extract_video(self, video_id, display_id):
+ def _extract_video(self, video_id):
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json(
- f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
+ f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id)
+
+ if traverse_obj(api_json, 'is_drm_protected'):
+ self.report_drm(video_id)
formats, subtitles = [], {}
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
if manifest_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
- manifest_url, display_id, fatal=False, m3u8_id='hls')
+ manifest_url, video_id, fatal=False, m3u8_id='hls')
elif manifest_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles(
- manifest_url, display_id, fatal=False, mpd_id='dash')
+ manifest_url, video_id, fatal=False, mpd_id='dash')
else:
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
+ for sub_url in traverse_obj(api_json, (
+ '_embedded', 'subtitle',
+ ('xml_url', 'sami_url', 'stl_url', 'ttml_url', 'srt_url', 'vtt_url'), {url_or_none})):
+ self._merge_subtitles({'de': [{'url': sub_url}]}, target=subtitles)
+
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
+ '_old_archive_ids': [make_archive_id('ORFTVthek', video_id)],
**traverse_obj(api_json, {
+ 'age_limit': ('age_classification', {parse_age_limit}),
'duration': ('duration_second', {float_or_none}),
'title': (('title', 'headline'), {str}),
'description': (('description', 'teaser_text'), {str}),
@@ -617,14 +464,14 @@ class ORFONIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id, display_id = self._match_valid_url(url).group('id', 'slug')
- webpage = self._download_webpage(url, display_id)
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage, default=None),
- **self._search_json_ld(webpage, display_id, fatal=False),
- **self._extract_video(video_id, display_id),
+ **self._search_json_ld(webpage, video_id, fatal=False),
+ **self._extract_video(video_id),
}
diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py
index 5620330..3e969c8 100644
--- a/yt_dlp/extractor/packtpub.py
+++ b/yt_dlp/extractor/packtpub.py
@@ -3,13 +3,12 @@ import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
# remove_end,
str_or_none,
strip_or_none,
unified_timestamp,
- # urljoin,
)
diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py
index 63c5fd6..6b25962 100644
--- a/yt_dlp/extractor/panopto.py
+++ b/yt_dlp/extractor/panopto.py
@@ -5,17 +5,13 @@ import json
import random
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlparse,
- compat_urlparse
-)
-
+from ..compat import compat_urllib_parse_urlparse, compat_urlparse
from ..utils import (
- bug_reports_message,
ExtractorError,
+ OnDemandPagedList,
+ bug_reports_message,
get_first,
int_or_none,
- OnDemandPagedList,
parse_qs,
srt_subtitles_timecode,
traverse_obj,
diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py
index 7e472a6..3f19803 100644
--- a/yt_dlp/extractor/paramountplus.py
+++ b/yt_dlp/extractor/paramountplus.py
@@ -1,7 +1,7 @@
import itertools
-from .common import InfoExtractor
from .cbs import CBSBaseIE
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index d4f822f..6c441ff 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -1,8 +1,8 @@
import itertools
+import urllib.parse
from .common import InfoExtractor
from .vimeo import VimeoIE
-from ..compat import compat_urllib_parse_unquote
from ..networking.exceptions import HTTPError
from ..utils import (
KNOWN_EXTENSIONS,
@@ -14,7 +14,6 @@ from ..utils import (
parse_iso8601,
str_or_none,
traverse_obj,
- try_get,
url_or_none,
urljoin,
)
@@ -199,7 +198,50 @@ class PatreonIE(PatreonBaseIE):
'channel_id': '2147162',
'uploader_url': 'https://www.patreon.com/yaboyroshi',
},
+ }, {
+ # NSFW vimeo embed URL
+ 'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
+ 'info_dict': {
+ 'id': '902250943',
+ 'ext': 'mp4',
+ 'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
+ 'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
+ 'uploader': 'Npickyeonhwa',
+ 'uploader_id': '90574422',
+ 'uploader_url': 'https://www.patreon.com/Yeonhwa726',
+ 'channel_id': '10237902',
+ 'channel_url': 'https://www.patreon.com/Yeonhwa726',
+ 'duration': 70,
+ 'timestamp': 1705150153,
+ 'upload_date': '20240113',
+ 'comment_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.+',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # multiple attachments/embeds
+ 'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
+ 'playlist_count': 3,
+ 'info_dict': {
+ 'id': '100601977',
+ 'title': '"Holy Wars" (Megadeth) Solos Transcription & Lesson/Analysis',
+ 'description': 'md5:d099ab976edfce6de2a65c2b169a88d3',
+ 'uploader': 'Bradley Hall',
+ 'uploader_id': '24401883',
+ 'uploader_url': 'https://www.patreon.com/bradleyhallguitar',
+ 'channel_id': '3193932',
+ 'channel_url': 'https://www.patreon.com/bradleyhallguitar',
+ 'channel_follower_count': int,
+ 'timestamp': 1710777855,
+ 'upload_date': '20240318',
+ 'like_count': int,
+ 'comment_count': int,
+ 'thumbnail': r're:^https?://.+',
+ },
+ 'skip': 'Patron-only content',
}]
+ _RETURN_TYPE = 'video'
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -214,95 +256,108 @@ class PatreonIE(PatreonBaseIE):
'include': 'audio,user,user_defined_tags,campaign,attachments_media',
})
attributes = post['data']['attributes']
- title = attributes['title'].strip()
- image = attributes.get('image') or {}
- info = {
- 'id': video_id,
- 'title': title,
- 'description': clean_html(attributes.get('content')),
- 'thumbnail': image.get('large_url') or image.get('url'),
- 'timestamp': parse_iso8601(attributes.get('published_at')),
- 'like_count': int_or_none(attributes.get('like_count')),
- 'comment_count': int_or_none(attributes.get('comment_count')),
- }
- can_view_post = traverse_obj(attributes, 'current_user_can_view')
- if can_view_post and info['comment_count']:
- info['__post_extractor'] = self.extract_comments(video_id)
-
- for i in post.get('included', []):
- i_type = i.get('type')
- if i_type == 'media':
- media_attributes = i.get('attributes') or {}
- download_url = media_attributes.get('download_url')
+ info = traverse_obj(attributes, {
+ 'title': ('title', {str.strip}),
+ 'description': ('content', {clean_html}),
+ 'thumbnail': ('image', ('large_url', 'url'), {url_or_none}, any),
+ 'timestamp': ('published_at', {parse_iso8601}),
+ 'like_count': ('like_count', {int_or_none}),
+ 'comment_count': ('comment_count', {int_or_none}),
+ })
+
+ entries = []
+ idx = 0
+ for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
+ include_type = include['type']
+ if include_type == 'media':
+ media_attributes = traverse_obj(include, ('attributes', {dict})) or {}
+ download_url = url_or_none(media_attributes.get('download_url'))
ext = mimetype2ext(media_attributes.get('mimetype'))
# if size_bytes is None, this media file is likely unavailable
# See: https://github.com/yt-dlp/yt-dlp/issues/4608
size_bytes = int_or_none(media_attributes.get('size_bytes'))
if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
- # XXX: what happens if there are multiple attachments?
- return {
- **info,
+ idx += 1
+ entries.append({
+ 'id': f'{video_id}-{idx}',
'ext': ext,
'filesize': size_bytes,
'url': download_url,
- }
- elif i_type == 'user':
- user_attributes = i.get('attributes')
- if user_attributes:
- info.update({
- 'uploader': user_attributes.get('full_name'),
- 'uploader_id': str_or_none(i.get('id')),
- 'uploader_url': user_attributes.get('url'),
})
- elif i_type == 'post_tag':
- info.setdefault('tags', []).append(traverse_obj(i, ('attributes', 'value')))
-
- elif i_type == 'campaign':
- info.update({
- 'channel': traverse_obj(i, ('attributes', 'title')),
- 'channel_id': str_or_none(i.get('id')),
- 'channel_url': traverse_obj(i, ('attributes', 'url')),
- 'channel_follower_count': int_or_none(traverse_obj(i, ('attributes', 'patron_count'))),
- })
+ elif include_type == 'user':
+ info.update(traverse_obj(include, {
+ 'uploader': ('attributes', 'full_name', {str}),
+ 'uploader_id': ('id', {str_or_none}),
+ 'uploader_url': ('attributes', 'url', {url_or_none}),
+ }))
+
+ elif include_type == 'post_tag':
+ if post_tag := traverse_obj(include, ('attributes', 'value', {str})):
+ info.setdefault('tags', []).append(post_tag)
+
+ elif include_type == 'campaign':
+ info.update(traverse_obj(include, {
+ 'channel': ('attributes', 'title', {str}),
+ 'channel_id': ('id', {str_or_none}),
+ 'channel_url': ('attributes', 'url', {url_or_none}),
+ 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
+ }))
# handle Vimeo embeds
- if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
- embed_html = try_get(attributes, lambda x: x['embed']['html'])
- v_url = url_or_none(compat_urllib_parse_unquote(
- self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
- if v_url:
- v_url = VimeoIE._smuggle_referrer(v_url, 'https://patreon.com')
- if self._request_webpage(v_url, video_id, 'Checking Vimeo embed URL', fatal=False, errnote=False):
- return self.url_result(v_url, VimeoIE, url_transparent=True, **info)
-
- embed_url = try_get(attributes, lambda x: x['embed']['url'])
+ if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
+ v_url = urllib.parse.unquote(self._html_search_regex(
+ r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
+ traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
+ if url_or_none(v_url) and self._request_webpage(
+ v_url, video_id, 'Checking Vimeo embed URL',
+ headers={'Referer': 'https://patreon.com/'},
+ fatal=False, errnote=False):
+ entries.append(self.url_result(
+ VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
+ VimeoIE, url_transparent=True))
+
+ embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
- return self.url_result(embed_url, **info)
+ entries.append(self.url_result(embed_url))
- post_file = traverse_obj(attributes, 'post_file')
+ post_file = traverse_obj(attributes, ('post_file', {dict}))
if post_file:
name = post_file.get('name')
ext = determine_ext(name)
if ext in KNOWN_EXTENSIONS:
- return {
- **info,
+ entries.append({
+ 'id': video_id,
'ext': ext,
'url': post_file['url'],
- }
+ })
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
- return {
- **info,
+ entries.append({
+ 'id': video_id,
'formats': formats,
'subtitles': subtitles,
- }
+ })
+
+ can_view_post = traverse_obj(attributes, 'current_user_can_view')
+ comments = None
+ if can_view_post and info.get('comment_count'):
+ comments = self.extract_comments(video_id)
- if can_view_post is False:
+ if not entries and can_view_post is False:
self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
- else:
+ elif not entries:
self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
+ elif len(entries) == 1:
+ info.update(entries[0])
+ else:
+ for entry in entries:
+ entry.update(info)
+ return self.playlist_result(entries, video_id, **info, __post_extractor=comments)
+
+ info['id'] = video_id
+ info['__post_extractor'] = comments
return info
def _get_comments(self, post_id):
diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py
index 2bb2ea9..f6f5a5c 100644
--- a/yt_dlp/extractor/pbs.py
+++ b/yt_dlp/extractor/pbs.py
@@ -3,10 +3,11 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ US_RATINGS,
ExtractorError,
determine_ext,
- int_or_none,
float_or_none,
+ int_or_none,
js_to_json,
orderedSet,
strip_jsonp,
@@ -14,7 +15,6 @@ from ..utils import (
traverse_obj,
unified_strdate,
url_or_none,
- US_RATINGS,
)
diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py
index e27e5a7..086eaaf 100644
--- a/yt_dlp/extractor/pearvideo.py
+++ b/yt_dlp/extractor/pearvideo.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..utils import (
qualities,
- unified_timestamp,
traverse_obj,
+ unified_timestamp,
)
diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py
index 730b239..b7919c0 100644
--- a/yt_dlp/extractor/peertube.py
+++ b/yt_dlp/extractor/peertube.py
@@ -4,6 +4,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ OnDemandPagedList,
format_field,
int_or_none,
parse_resolution,
@@ -12,7 +13,6 @@ from ..utils import (
unified_timestamp,
url_or_none,
urljoin,
- OnDemandPagedList,
)
diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py
index 3ae985d..87d912d 100644
--- a/yt_dlp/extractor/piapro.py
+++ b/yt_dlp/extractor/piapro.py
@@ -2,6 +2,8 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
+ clean_html,
+ get_element_by_class,
parse_duration,
parse_filesize,
str_to_int,
@@ -88,34 +90,22 @@ class PiaproIE(InfoExtractor):
if category_id not in ('1', '2', '21', '22', '23', '24', '25'):
raise ExtractorError('The URL does not contain audio.', expected=True)
- str_duration, str_filesize = self._search_regex(
- r'サイズ:</span>(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size',
- group=(1, 2), default=(None, None))
- str_viewcount = self._search_regex(r'閲覧数:</span>([0-9,]+)\s+', webpage, 'view count', fatal=False)
-
- uploader_id, uploader = self._search_regex(
- r'<a\s+class="cd_user-name"\s+href="/(.*)">([^<]+)さん<', webpage, 'uploader',
- group=(1, 2), default=(None, None))
- content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID')
- create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp')
-
- player_webpage = self._download_webpage(
- f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}',
- video_id, note='Downloading player webpage')
+ def extract_info(name, description):
+ return self._search_regex(rf'{name}[::]\s*([\d\s,:/]+)\s*</p>', webpage, description, default=None)
return {
'id': video_id,
- 'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False),
- 'description': self._html_search_regex(r'(?s)<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False),
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'timestamp': unified_timestamp(create_date, False),
- 'duration': parse_duration(str_duration),
- 'view_count': str_to_int(str_viewcount),
+ 'title': clean_html(get_element_by_class('contents_title', webpage)),
+ 'description': clean_html(get_element_by_class('contents_description', webpage)),
+ 'uploader': clean_html(get_element_by_class('contents_creator_txt', webpage)),
+ 'uploader_id': self._search_regex(
+ r'<a\s+href="/([^"]+)"', get_element_by_class('contents_creator', webpage), 'uploader id', default=None),
+ 'timestamp': unified_timestamp(extract_info('投稿日', 'timestamp'), False),
+ 'duration': parse_duration(extract_info('長さ', 'duration')),
+ 'view_count': str_to_int(extract_info('閲覧数', 'view count')),
'thumbnail': self._html_search_meta('twitter:image', webpage),
-
- 'filesize_approx': parse_filesize(str_filesize.replace(',', '')),
- 'url': self._search_regex(r'mp3:\s*\'(.*?)\'\}', player_webpage, 'url'),
+ 'filesize_approx': parse_filesize((extract_info('サイズ', 'size') or '').replace(',', '')),
+ 'url': self._search_regex(r'\"url\":\s*\"(.*?)\"', webpage, 'url'),
'ext': 'mp3',
'vcodec': 'none',
}
diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py
index 97a9bf5..8870d7b 100644
--- a/yt_dlp/extractor/piksel.py
+++ b/yt_dlp/extractor/piksel.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
int_or_none,
join_nonempty,
parse_iso8601,
diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py
index d67f600..c72a387 100644
--- a/yt_dlp/extractor/pladform.py
+++ b/yt_dlp/extractor/pladform.py
@@ -1,11 +1,11 @@
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_qs,
- xpath_text,
qualities,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py
index 166b98c..d978c08 100644
--- a/yt_dlp/extractor/platzi.py
+++ b/yt_dlp/extractor/platzi.py
@@ -4,8 +4,8 @@ from ..compat import (
compat_str,
)
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py
index c418f88..a01b422 100644
--- a/yt_dlp/extractor/playtvak.py
+++ b/yt_dlp/extractor/playtvak.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_urllib_parse_urlencode,
+ compat_urlparse,
)
from ..utils import (
ExtractorError,
diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py
index 809b656..60c9eff 100644
--- a/yt_dlp/extractor/pluralsight.py
+++ b/yt_dlp/extractor/pluralsight.py
@@ -10,8 +10,8 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
float_or_none,
int_or_none,
parse_duration,
diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py
index 1cebb36..ecf2132 100644
--- a/yt_dlp/extractor/polsatgo.py
+++ b/yt_dlp/extractor/polsatgo.py
@@ -3,10 +3,10 @@ import uuid
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
int_or_none,
try_get,
url_or_none,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/porn91.py b/yt_dlp/extractor/porn91.py
deleted file mode 100644
index 7d16a16..0000000
--- a/yt_dlp/extractor/porn91.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import urllib.parse
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- parse_duration,
- remove_end,
- unified_strdate,
- ExtractorError,
-)
-
-
-class Porn91IE(InfoExtractor):
- IE_NAME = '91porn'
- _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/view_video.php\?([^#]+&)?viewkey=(?P<id>\w+)'
-
- _TESTS = [{
- 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
- 'md5': 'd869db281402e0ef4ddef3c38b866f86',
- 'info_dict': {
- 'id': '7e42283b4f5ab36da134',
- 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
- 'description': 'md5:1ff241f579b07ae936a54e810ad2e891',
- 'ext': 'mp4',
- 'duration': 431,
- 'upload_date': '20150520',
- 'comment_count': int,
- 'view_count': int,
- 'age_limit': 18,
- }
- }, {
- 'url': 'https://91porn.com/view_video.php?viewkey=7ef0cf3d362c699ab91c',
- 'md5': 'f8fd50540468a6d795378cd778b40226',
- 'info_dict': {
- 'id': '7ef0cf3d362c699ab91c',
- 'title': '真实空乘,冲上云霄第二部',
- 'description': 'md5:618bf9652cafcc66cd277bd96789baea',
- 'ext': 'mp4',
- 'duration': 248,
- 'upload_date': '20221119',
- 'comment_count': int,
- 'view_count': int,
- 'age_limit': 18,
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- self._set_cookie('91porn.com', 'language', 'cn_CN')
-
- webpage = self._download_webpage(
- 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
-
- if '视频不存在,可能已经被删除或者被举报为不良内容!' in webpage:
- raise ExtractorError('91 Porn says: Video does not exist', expected=True)
-
- daily_limit = self._search_regex(
- r'作为游客,你每天只可观看([\d]+)个视频', webpage, 'exceeded daily limit', default=None, fatal=False)
- if daily_limit:
- raise ExtractorError(f'91 Porn says: Daily limit {daily_limit} videos exceeded', expected=True)
-
- video_link_url = self._search_regex(
- r'document\.write\(\s*strencode2\s*\(\s*((?:"[^"]+")|(?:\'[^\']+\'))', webpage, 'video link')
- video_link_url = self._search_regex(
- r'src=["\']([^"\']+)["\']', urllib.parse.unquote(video_link_url), 'unquoted video link')
-
- formats, subtitles = self._get_formats_and_subtitle(video_link_url, video_id)
-
- return {
- 'id': video_id,
- 'title': remove_end(self._html_extract_title(webpage).replace('\n', ''), 'Chinese homemade video').strip(),
- 'formats': formats,
- 'subtitles': subtitles,
- 'upload_date': unified_strdate(self._search_regex(
- r'<span\s+class=["\']title-yakov["\']>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload_date', fatal=False)),
- 'description': self._html_search_regex(
- r'<span\s+class=["\']more title["\']>\s*([^<]+)', webpage, 'description', fatal=False),
- 'duration': parse_duration(self._search_regex(
- r'时长:\s*<span[^>]*>\s*(\d+(?::\d+){1,2})', webpage, 'duration', fatal=False)),
- 'comment_count': int_or_none(self._search_regex(
- r'留言:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)),
- 'view_count': int_or_none(self._search_regex(
- r'热度:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'view count', fatal=False)),
- 'age_limit': 18,
- }
-
- def _get_formats_and_subtitle(self, video_link_url, video_id):
- ext = determine_ext(video_link_url)
- if ext == 'm3u8':
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_link_url, video_id, ext='mp4')
- else:
- formats = [{'url': video_link_url, 'ext': ext}]
- subtitles = {}
-
- return formats, subtitles
diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py
index 51a9cf3..d711d3e 100644
--- a/yt_dlp/extractor/pornflip.py
+++ b/yt_dlp/extractor/pornflip.py
@@ -1,9 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
- parse_iso8601
-)
+from ..utils import int_or_none, parse_duration, parse_iso8601
class PornFlipIE(InfoExtractor):
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index 29a3e43..d94f28c 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -97,7 +97,7 @@ class PornHubBaseIE(InfoExtractor):
login_form = self._hidden_inputs(login_page)
login_form.update({
- 'username': username,
+ 'email': username,
'password': password,
})
diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py
index 2e51b4f..b8e8701 100644
--- a/yt_dlp/extractor/pornovoisines.py
+++ b/yt_dlp/extractor/pornovoisines.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
float_or_none,
+ int_or_none,
unified_strdate,
)
diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py
index 5bb1832..338794e 100644
--- a/yt_dlp/extractor/prx.py
+++ b/yt_dlp/extractor/prx.py
@@ -1,14 +1,15 @@
import itertools
+
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
- urljoin,
- traverse_obj,
+ clean_html,
int_or_none,
mimetype2ext,
- clean_html,
- url_or_none,
- unified_timestamp,
str_or_none,
+ traverse_obj,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py
index 4b8e5e9..fc4c29e 100644
--- a/yt_dlp/extractor/puhutv.py
+++ b/yt_dlp/extractor/puhutv.py
@@ -3,8 +3,8 @@ from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
+ int_or_none,
parse_resolution,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/qingting.py b/yt_dlp/extractor/qingting.py
index aa690d4..cb00de2 100644
--- a/yt_dlp/extractor/qingting.py
+++ b/yt_dlp/extractor/qingting.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import traverse_obj
diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py
index 9285825..90141e6 100644
--- a/yt_dlp/extractor/qqmusic.py
+++ b/yt_dlp/extractor/qqmusic.py
@@ -4,8 +4,8 @@ import time
from .common import InfoExtractor
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
strip_jsonp,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py
index 1a5a635..4a09dcd 100644
--- a/yt_dlp/extractor/radiocanada.py
+++ b/yt_dlp/extractor/radiocanada.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
unified_strdate,
)
diff --git a/yt_dlp/extractor/radiocomercial.py b/yt_dlp/extractor/radiocomercial.py
index 38f8cf7..0c21977 100644
--- a/yt_dlp/extractor/radiocomercial.py
+++ b/yt_dlp/extractor/radiocomercial.py
@@ -14,7 +14,7 @@ from ..utils import (
try_call,
unified_strdate,
update_url,
- urljoin
+ urljoin,
)
from ..utils.traversal import traverse_obj
diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py
index 6752017..632c8c2 100644
--- a/yt_dlp/extractor/radiozet.py
+++ b/yt_dlp/extractor/radiozet.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- traverse_obj,
strip_or_none,
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py
index 3c00183..325e278 100644
--- a/yt_dlp/extractor/radlive.py
+++ b/yt_dlp/extractor/radlive.py
@@ -1,13 +1,13 @@
import json
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
format_field,
traverse_obj,
try_get,
- unified_timestamp
+ unified_timestamp,
)
-from .common import InfoExtractor
class RadLiveIE(InfoExtractor):
diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py
index c1fc65c..c2e7a6f 100644
--- a/yt_dlp/extractor/rai.py
+++ b/yt_dlp/extractor/rai.py
@@ -3,11 +3,11 @@ import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
+ ExtractorError,
+ GeoRestrictedError,
clean_html,
determine_ext,
- ExtractorError,
filter_dict,
- GeoRestrictedError,
int_or_none,
join_nonempty,
parse_duration,
diff --git a/yt_dlp/extractor/rbgtum.py b/yt_dlp/extractor/rbgtum.py
index 54f194c..5f2d0c1 100644
--- a/yt_dlp/extractor/rbgtum.py
+++ b/yt_dlp/extractor/rbgtum.py
@@ -1,7 +1,7 @@
import re
from .common import InfoExtractor
-from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError
+from ..utils import ExtractorError, parse_qs, remove_start, traverse_obj
class RbgTumIE(InfoExtractor):
diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py
index 6a7c7f3..9c382e2 100644
--- a/yt_dlp/extractor/rcti.py
+++ b/yt_dlp/extractor/rcti.py
@@ -5,11 +5,11 @@ import time
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
strip_or_none,
traverse_obj,
- try_get
+ try_get,
)
diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py
index 1a1c663..cc76b89 100644
--- a/yt_dlp/extractor/rds.py
+++ b/yt_dlp/extractor/rds.py
@@ -1,10 +1,10 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ js_to_json,
parse_duration,
parse_iso8601,
- js_to_json,
)
-from ..compat import compat_str
class RDSIE(InfoExtractor):
diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py
index d1de249..fac51b9 100644
--- a/yt_dlp/extractor/redbulltv.py
+++ b/yt_dlp/extractor/redbulltv.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- float_or_none,
ExtractorError,
+ float_or_none,
)
diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 62f669f..bc3e5f7 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -5,11 +5,13 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ parse_qs,
traverse_obj,
try_get,
unescapeHTML,
- urlencode_postdata,
+ update_url_query,
url_or_none,
+ urlencode_postdata,
)
@@ -76,7 +78,7 @@ class RedditIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'comment_count': int,
- 'age_limit': 0,
+ 'age_limit': 18,
'channel_id': 'u_creepyt0es',
},
'params': {
@@ -151,6 +153,51 @@ class RedditIE(InfoExtractor):
},
'skip': 'Requires account that has opted-in to the GenZedong subreddit',
}, {
+ # subtitles in HLS manifest
+ 'url': 'https://www.reddit.com/r/Unexpected/comments/1cl9h0u/the_insurance_claim_will_be_interesting/',
+ 'info_dict': {
+ 'id': 'a2mdj5d57qyc1',
+ 'ext': 'mp4',
+ 'display_id': '1cl9h0u',
+ 'title': 'The insurance claim will be interesting',
+ 'uploader': 'darrenpauli',
+ 'channel_id': 'Unexpected',
+ 'duration': 53,
+ 'upload_date': '20240506',
+ 'timestamp': 1714966382,
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ 'subtitles': {'en': 'mincount:1'},
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # subtitles from caption-url
+ 'url': 'https://www.reddit.com/r/soccer/comments/1cxwzso/tottenham_1_0_newcastle_united_james_maddison_31/',
+ 'info_dict': {
+ 'id': 'xbmj4t3igy1d1',
+ 'ext': 'mp4',
+ 'display_id': '1cxwzso',
+ 'title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'',
+ 'uploader': 'Woodstovia',
+ 'channel_id': 'soccer',
+ 'duration': 30,
+ 'upload_date': '20240522',
+ 'timestamp': 1716373798,
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ 'subtitles': {'en': 'mincount:1'},
+ },
+ 'params': {
+ 'skip_download': True,
+ 'writesubtitles': True,
+ },
+ }, {
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
'only_matching': True,
}, {
@@ -197,6 +244,12 @@ class RedditIE(InfoExtractor):
elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
raise ExtractorError('Unable to login, no cookie was returned')
+ def _get_subtitles(self, video_id):
+ # Fallback if there were no subtitles provided by DASH or HLS manifests
+ caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt'
+ if self._is_valid_url(caption_url, video_id, item='subtitles'):
+ return {'en': [{'url': caption_url}]}
+
def _real_extract(self, url):
host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
@@ -307,6 +360,10 @@ class RedditIE(InfoExtractor):
dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd'
hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8'
+ qs = traverse_obj(parse_qs(hls_playlist_url), {
+ 'f': ('f', 0, {lambda x: ','.join([x, 'subsAll']) if x else 'hd,subsAll'}),
+ })
+ hls_playlist_url = update_url_query(hls_playlist_url, qs)
formats = [{
'url': unescapeHTML(reddit_video['fallback_url']),
@@ -332,7 +389,7 @@ class RedditIE(InfoExtractor):
'id': video_id,
'display_id': display_id,
'formats': formats,
- 'subtitles': subtitles,
+ 'subtitles': subtitles or self.extract_subtitles(video_id),
'duration': int_or_none(reddit_video.get('duration')),
}
diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py
index f945320..d0546bb 100644
--- a/yt_dlp/extractor/redgifs.py
+++ b/yt_dlp/extractor/redgifs.py
@@ -5,10 +5,10 @@ from ..compat import compat_parse_qs
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
+ OnDemandPagedList,
int_or_none,
qualities,
try_get,
- OnDemandPagedList,
)
diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py
index 965abbe..14ed0ed 100644
--- a/yt_dlp/extractor/redtube.py
+++ b/yt_dlp/extractor/redtube.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
merge_dicts,
str_to_int,
diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py
index 0a8f13b..9c9bac6 100644
--- a/yt_dlp/extractor/reuters.py
+++ b/yt_dlp/extractor/reuters.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- js_to_json,
int_or_none,
+ js_to_json,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py
index 8d29b30..bc59ed0 100644
--- a/yt_dlp/extractor/rmcdecouverte.py
+++ b/yt_dlp/extractor/rmcdecouverte.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
+from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py
index 7ba80d4..729804d 100644
--- a/yt_dlp/extractor/rte.py
+++ b/yt_dlp/extractor/rte.py
@@ -3,13 +3,13 @@ import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
float_or_none,
parse_iso8601,
str_or_none,
try_get,
unescapeHTML,
url_or_none,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py
index 5928a20..ec78d0a 100644
--- a/yt_dlp/extractor/rtp.py
+++ b/yt_dlp/extractor/rtp.py
@@ -1,9 +1,10 @@
-from .common import InfoExtractor
-from ..utils import js_to_json
-import re
+import base64
import json
+import re
import urllib.parse
-import base64
+
+from .common import InfoExtractor
+from ..utils import js_to_json
class RTPIE(InfoExtractor):
diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py
index 741c472..e7dcd5f 100644
--- a/yt_dlp/extractor/rtvcplay.py
+++ b/yt_dlp/extractor/rtvcplay.py
@@ -1,16 +1,17 @@
import re
-from .common import InfoExtractor, ExtractorError
+from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- int_or_none,
float_or_none,
+ int_or_none,
js_to_json,
mimetype2ext,
traverse_obj,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py
index a84a78d..defb8d7 100644
--- a/yt_dlp/extractor/rtvs.py
+++ b/yt_dlp/extractor/rtvs.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import (
parse_duration,
traverse_obj,
diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py
index 287824d..eb12f32 100644
--- a/yt_dlp/extractor/rutube.py
+++ b/yt_dlp/extractor/rutube.py
@@ -5,8 +5,8 @@ from ..compat import (
compat_str,
)
from ..utils import (
- determine_ext,
bool_or_none,
+ determine_ext,
int_or_none,
parse_qs,
try_get,
diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py
index d7f9a73..726d491 100644
--- a/yt_dlp/extractor/rutv.py
+++ b/yt_dlp/extractor/rutv.py
@@ -1,11 +1,7 @@
import re
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- str_to_int
-)
+from ..utils import ExtractorError, int_or_none, str_to_int
class RUTVIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py
index 33f6652..dc61387 100644
--- a/yt_dlp/extractor/ruutu.py
+++ b/yt_dlp/extractor/ruutu.py
@@ -4,8 +4,8 @@ import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
find_xpath_attr,
int_or_none,
traverse_obj,
diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py
index 8d322d7..17dff0a 100644
--- a/yt_dlp/extractor/safari.py
+++ b/yt_dlp/extractor/safari.py
@@ -2,7 +2,6 @@ import json
import re
from .common import InfoExtractor
-
from ..compat import (
compat_parse_qs,
compat_urlparse,
diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py
index 3912f77..85d51cd 100644
--- a/yt_dlp/extractor/scrippsnetworks.py
+++ b/yt_dlp/extractor/scrippsnetworks.py
@@ -1,8 +1,8 @@
-import json
import hashlib
+import json
-from .aws import AWSIE
from .anvato import AnvatoIE
+from .aws import AWSIE
from .common import InfoExtractor
from ..utils import (
smuggle_url,
diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py
index 9c2ca8c..fc91d60 100644
--- a/yt_dlp/extractor/scte.py
+++ b/yt_dlp/extractor/scte.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- decode_packed_codes,
ExtractorError,
+ decode_packed_codes,
urlencode_postdata,
)
diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py
index 1ecea71..99fcf51 100644
--- a/yt_dlp/extractor/sendtonews.py
+++ b/yt_dlp/extractor/sendtonews.py
@@ -2,12 +2,12 @@ import re
from .common import InfoExtractor
from ..utils import (
+ determine_protocol,
float_or_none,
- parse_iso8601,
- update_url_query,
int_or_none,
- determine_protocol,
+ parse_iso8601,
unescapeHTML,
+ update_url_query,
)
diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py
index 79e8885..b31d566 100644
--- a/yt_dlp/extractor/seznamzpravy.py
+++ b/yt_dlp/extractor/seznamzpravy.py
@@ -4,11 +4,11 @@ from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
- urljoin,
int_or_none,
parse_codecs,
parse_qs,
try_get,
+ urljoin,
)
diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py
index d509e88..89aee27 100644
--- a/yt_dlp/extractor/shahid.py
+++ b/yt_dlp/extractor/shahid.py
@@ -5,9 +5,9 @@ import re
from .aws import AWSIE
from ..networking.exceptions import HTTPError
from ..utils import (
- clean_html,
ExtractorError,
InAdvancePagedList,
+ clean_html,
int_or_none,
parse_iso8601,
str_or_none,
diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py
index ec9938b..cca86ed 100644
--- a/yt_dlp/extractor/shemaroome.py
+++ b/yt_dlp/extractor/shemaroome.py
@@ -4,8 +4,8 @@ from ..compat import (
compat_b64decode,
)
from ..utils import (
- bytes_to_intlist,
ExtractorError,
+ bytes_to_intlist,
intlist_to_bytes,
unified_strdate,
)
diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py
index ef93b92..44619a1 100644
--- a/yt_dlp/extractor/sixplay.py
+++ b/yt_dlp/extractor/sixplay.py
@@ -6,8 +6,8 @@ from ..utils import (
determine_ext,
int_or_none,
parse_qs,
- try_get,
qualities,
+ try_get,
)
diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py
index 8677827..234703c 100644
--- a/yt_dlp/extractor/skynewsarabia.py
+++ b/yt_dlp/extractor/skynewsarabia.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- parse_iso8601,
parse_duration,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py
index c0ff4f9..a41ad30 100644
--- a/yt_dlp/extractor/sohu.py
+++ b/yt_dlp/extractor/sohu.py
@@ -8,13 +8,13 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
- url_or_none,
- unified_timestamp,
+ int_or_none,
+ traverse_obj,
try_get,
+ unified_timestamp,
+ url_or_none,
urljoin,
- traverse_obj,
)
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index c9ed645..3581461 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -1,3 +1,4 @@
+import functools
import itertools
import json
import re
@@ -12,6 +13,7 @@ from ..utils import (
error_to_compat_str,
float_or_none,
int_or_none,
+ join_nonempty,
mimetype2ext,
parse_qs,
str_or_none,
@@ -68,6 +70,16 @@ class SoundcloudBaseIE(InfoExtractor):
'original': 0,
}
+ _DEFAULT_FORMATS = ['http_aac', 'hls_aac', 'http_opus', 'hls_opus', 'http_mp3', 'hls_mp3']
+
+ @functools.cached_property
+ def _is_requested(self):
+ return re.compile(r'|'.join(set(
+ re.escape(pattern).replace(r'\*', r'.*') if pattern != 'default'
+ else '|'.join(map(re.escape, self._DEFAULT_FORMATS))
+ for pattern in self._configuration_arg('formats', ['default'], ie_key=SoundcloudIE)
+ ))).fullmatch
+
def _store_client_id(self, client_id):
self.cache.store('soundcloud', 'client_id', client_id)
@@ -216,7 +228,7 @@ class SoundcloudBaseIE(InfoExtractor):
redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
if redirect_url:
urlh = self._request_webpage(
- HEADRequest(redirect_url), track_id, fatal=False)
+ HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False)
if urlh:
format_url = urlh.url
format_urls.add(format_url)
@@ -258,7 +270,7 @@ class SoundcloudBaseIE(InfoExtractor):
abr = f.get('abr')
if abr:
f['abr'] = int(abr)
- if protocol == 'hls':
+ if protocol in ('hls', 'hls-aes'):
protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
else:
protocol = 'http'
@@ -274,11 +286,32 @@ class SoundcloudBaseIE(InfoExtractor):
if extract_flat:
break
format_url = t['url']
- stream = None
+ protocol = traverse_obj(t, ('format', 'protocol', {str}))
+ if protocol == 'progressive':
+ protocol = 'http'
+ if protocol != 'hls' and '/hls' in format_url:
+ protocol = 'hls'
+ if protocol == 'encrypted-hls' or '/encrypted-hls' in format_url:
+ protocol = 'hls-aes'
+
+ ext = None
+ if preset := traverse_obj(t, ('preset', {str_or_none})):
+ ext = preset.split('_')[0]
+ if ext not in KNOWN_EXTENSIONS:
+ ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str})))
+
+ identifier = join_nonempty(protocol, ext, delim='_')
+ if not self._is_requested(identifier):
+ self.write_debug(f'"{identifier}" is not a requested format, skipping')
+ continue
+
+ stream = None
for retry in self.RetryManager(fatal=False):
try:
- stream = self._download_json(format_url, track_id, query=query, headers=self._HEADERS)
+ stream = self._download_json(
+ format_url, track_id, f'Downloading {identifier} format info JSON',
+ query=query, headers=self._HEADERS)
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 429:
self.report_warning(
@@ -289,27 +322,14 @@ class SoundcloudBaseIE(InfoExtractor):
else:
self.report_warning(e.msg)
- if not isinstance(stream, dict):
- continue
- stream_url = url_or_none(stream.get('url'))
+ stream_url = traverse_obj(stream, ('url', {url_or_none}))
if invalid_url(stream_url):
continue
format_urls.add(stream_url)
- stream_format = t.get('format') or {}
- protocol = stream_format.get('protocol')
- if protocol != 'hls' and '/hls' in format_url:
- protocol = 'hls'
- ext = None
- preset = str_or_none(t.get('preset'))
- if preset:
- ext = preset.split('_')[0]
- if ext not in KNOWN_EXTENSIONS:
- ext = mimetype2ext(stream_format.get('mime_type'))
add_format({
'url': stream_url,
'ext': ext,
- }, 'http' if protocol == 'progressive' else protocol,
- t.get('snipped') or '/preview/' in format_url)
+ }, protocol, t.get('snipped') or '/preview/' in format_url)
for f in formats:
f['vcodec'] = 'none'
@@ -361,7 +381,7 @@ class SoundcloudBaseIE(InfoExtractor):
'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'),
- 'genre': info.get('genre'),
+ 'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
'formats': formats if not extract_flat else None
}
@@ -395,10 +415,10 @@ class SoundcloudIE(SoundcloudBaseIE):
_TESTS = [
{
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
- 'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+ 'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
'info_dict': {
'id': '62986583',
- 'ext': 'mp3',
+ 'ext': 'opus',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music',
@@ -411,6 +431,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
+ 'uploader_url': 'https://soundcloud.com/ethmusic',
+ 'genres': [],
}
},
# geo-restricted
@@ -418,7 +441,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': {
'id': '47127627',
- 'ext': 'mp3',
+ 'ext': 'opus',
'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept',
@@ -431,6 +454,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/the-concept-band',
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
+ 'genres': ['Alternative'],
},
},
# private link
@@ -452,6 +478,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/jaimemf',
+ 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+ 'genres': ['youtubedl'],
},
},
# private link (alt format)
@@ -473,6 +502,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/jaimemf',
+ 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
+ 'genres': ['youtubedl'],
},
},
# downloadable song
@@ -482,6 +514,21 @@ class SoundcloudIE(SoundcloudBaseIE):
'info_dict': {
'id': '343609555',
'ext': 'wav',
+ 'title': 'The Following',
+ 'description': '',
+ 'uploader': '80M',
+ 'uploader_id': '312384765',
+ 'uploader_url': 'https://soundcloud.com/the80m',
+ 'upload_date': '20170922',
+ 'timestamp': 1506120436,
+ 'duration': 397.228,
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg',
+ 'license': 'all-rights-reserved',
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ 'view_count': int,
+ 'genres': ['Dance & EDM'],
},
},
# private link, downloadable format
@@ -503,6 +550,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
+ 'uploader_url': 'https://soundcloud.com/oriuplift',
+ 'genres': ['Trance'],
},
},
# no album art, use avatar pic for thumbnail
@@ -525,6 +575,8 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'uploader_url': 'https://soundcloud.com/garyvee',
+ 'genres': [],
},
'params': {
'skip_download': True,
@@ -532,13 +584,13 @@ class SoundcloudIE(SoundcloudBaseIE):
},
{
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
- 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+ 'md5': '8227c3473a4264df6b02ad7e5b7527ac',
'info_dict': {
'id': '583011102',
- 'ext': 'mp3',
+ 'ext': 'opus',
'title': 'Mezzo Valzer',
- 'description': 'md5:4138d582f81866a530317bae316e8b61',
- 'uploader': 'Micronie',
+ 'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
+ 'uploader': 'Giovanni Sarani',
'uploader_id': '3352531',
'timestamp': 1551394171,
'upload_date': '20190228',
@@ -549,6 +601,8 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
+ 'genres': ['Piano'],
+ 'uploader_url': 'https://soundcloud.com/giovannisarani',
},
},
{
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 493eea2..773ddd3 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- try_get,
- unified_timestamp
-)
+from ..utils import try_get, unified_timestamp
class SovietsClosetBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py
index 43da34a..c73f797 100644
--- a/yt_dlp/extractor/spankbang.py
+++ b/yt_dlp/extractor/spankbang.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
merge_dicts,
parse_duration,
parse_resolution,
diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py
index a98584a..bdb8ef4 100644
--- a/yt_dlp/extractor/springboardplatform.py
+++ b/yt_dlp/extractor/springboardplatform.py
@@ -4,11 +4,11 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
- xpath_attr,
- xpath_text,
- xpath_element,
unescapeHTML,
unified_timestamp,
+ xpath_attr,
+ xpath_element,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/stacommu.py b/yt_dlp/extractor/stacommu.py
index 1308c59..d2f207f 100644
--- a/yt_dlp/extractor/stacommu.py
+++ b/yt_dlp/extractor/stacommu.py
@@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
- _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
+ _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:vod'
_TESTS = [{
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
@@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
}, {
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
'only_matching': True,
+ }, {
+ 'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
+ 'only_matching': True,
}]
_API_PATH = 'videoEpisodes'
@@ -204,7 +207,7 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
- _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
+ _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:ppv'
_TESTS = [{
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
@@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
}, {
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
'only_matching': True,
+ }, {
+ 'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
+ 'only_matching': True,
}]
_API_PATH = 'events'
diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py
index bb6e8f1..312a4fd 100644
--- a/yt_dlp/extractor/startv.py
+++ b/yt_dlp/extractor/startv.py
@@ -3,10 +3,10 @@ from ..compat import (
compat_str,
)
from ..utils import (
- clean_html,
ExtractorError,
- traverse_obj,
+ clean_html,
int_or_none,
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py
index 2fd200f..46a15e6 100644
--- a/yt_dlp/extractor/stitcher.py
+++ b/yt_dlp/extractor/stitcher.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ExtractorError,
clean_html,
clean_podcast_url,
- ExtractorError,
int_or_none,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py
index 566f777..20a70a7 100644
--- a/yt_dlp/extractor/storyfire.py
+++ b/yt_dlp/extractor/storyfire.py
@@ -2,9 +2,9 @@ import functools
from .common import InfoExtractor
from ..utils import (
+ OnDemandPagedList,
format_field,
int_or_none,
- OnDemandPagedList,
smuggle_url,
)
diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py
index 462861e..c303ac5 100644
--- a/yt_dlp/extractor/streamable.py
+++ b/yt_dlp/extractor/streamable.py
@@ -3,8 +3,8 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
- try_get,
parse_codecs,
+ try_get,
)
diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py
index b9523c8..a847925 100644
--- a/yt_dlp/extractor/stripchat.py
+++ b/yt_dlp/extractor/stripchat.py
@@ -3,7 +3,7 @@ from ..utils import (
ExtractorError,
UserNotLive,
lowercase_escape,
- traverse_obj
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py
index 8b3e635..0ab7801 100644
--- a/yt_dlp/extractor/stv.py
+++ b/yt_dlp/extractor/stv.py
@@ -41,7 +41,7 @@ class STVPlayerIE(InfoExtractor):
ptype, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id, fatal=False) or ''
- props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
+ props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
player_api_cache = try_get(
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py
index 708873a..501156e 100644
--- a/yt_dlp/extractor/sunporno.py
+++ b/yt_dlp/extractor/sunporno.py
@@ -2,10 +2,10 @@ import re
from .common import InfoExtractor
from ..utils import (
- parse_duration,
+ determine_ext,
int_or_none,
+ parse_duration,
qualities,
- determine_ext,
)
diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py
index bd2d738..29e5e57 100644
--- a/yt_dlp/extractor/syfy.py
+++ b/yt_dlp/extractor/syfy.py
@@ -1,7 +1,7 @@
from .adobepass import AdobePassIE
from ..utils import (
- update_url_query,
smuggle_url,
+ update_url_query,
)
diff --git a/yt_dlp/extractor/taptap.py b/yt_dlp/extractor/taptap.py
new file mode 100644
index 0000000..56f2f0e
--- /dev/null
+++ b/yt_dlp/extractor/taptap.py
@@ -0,0 +1,275 @@
+import re
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ join_nonempty,
+ str_or_none,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class TapTapBaseIE(InfoExtractor):
+ _X_UA = 'V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&DS=Android&UID={uuid}&OS=Windows&OSV=10&DT=PC'
+ _VIDEO_API = 'https://www.taptap.cn/webapiv2/video-resource/v1/multi-get'
+ _INFO_API = None
+ _INFO_QUERY_KEY = 'id'
+ _DATA_PATH = None
+ _ID_PATH = None
+ _META_PATH = None
+
+ def _get_api(self, url, video_id, query, **kwargs):
+ query = {**query, 'X-UA': self._X_UA.format(uuid=uuid.uuid4())}
+ return self._download_json(url, video_id, query=query, **kwargs)['data']
+
+ def _extract_video(self, video_id):
+ video_data = self._get_api(self._VIDEO_API, video_id, query={'video_ids': video_id})['list'][0]
+
+ # h265 playlist contains both h265 and h264 formats
+ video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any))
+ formats = self._extract_m3u8_formats(video_url, video_id, fatal=False)
+ for format in formats:
+ if re.search(r'^(hev|hvc|hvt)\d', format.get('vcodec', '')):
+ format['format_id'] = join_nonempty(format.get('format_id'), 'h265', delim='_')
+
+ return {
+ 'id': str(video_id),
+ 'formats': formats,
+ **traverse_obj(video_data, ({
+ 'duration': ('info', 'duration', {int_or_none}),
+ 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}),
+ }), get_all=False)
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ query = {self._INFO_QUERY_KEY: video_id}
+
+ data = traverse_obj(
+ self._get_api(self._INFO_API, video_id, query=query), self._DATA_PATH)
+
+ metainfo = traverse_obj(data, self._META_PATH)
+ entries = [{
+ **metainfo,
+ **self._extract_video(id)
+ } for id in set(traverse_obj(data, self._ID_PATH))]
+
+ return self.playlist_result(entries, **metainfo, id=video_id)
+
+
+class TapTapMomentIE(TapTapBaseIE):
+ _VALID_URL = r'https?://www\.taptap\.cn/moment/(?P<id>\d+)'
+ _INFO_API = 'https://www.taptap.cn/webapiv2/moment/v3/detail'
+ _ID_PATH = ('moment', 'topic', (('videos', ...), 'pin_video'), 'video_id')
+ _META_PATH = ('moment', {
+ 'timestamp': ('created_time', {int_or_none}),
+ 'modified_timestamp': ('edited_time', {int_or_none}),
+ 'uploader': ('author', 'user', 'name', {str}),
+ 'uploader_id': ('author', 'user', 'id', {int}, {str_or_none}),
+ 'title': ('topic', 'title', {str}),
+ 'description': ('topic', 'summary', {str}),
+ })
+ _TESTS = [{
+ 'url': 'https://www.taptap.cn/moment/194618230982052443',
+ 'info_dict': {
+ 'id': '194618230982052443',
+ 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
+ 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
+ 'timestamp': 1633453402,
+ 'upload_date': '20211005',
+ 'modified_timestamp': 1633453402,
+ 'modified_date': '20211005',
+ 'uploader': '乌酱',
+ 'uploader_id': '532896',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2202584',
+ 'ext': 'mp4',
+ 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
+ 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
+ 'duration': 66,
+ 'timestamp': 1633453402,
+ 'upload_date': '20211005',
+ 'modified_timestamp': 1633453402,
+ 'modified_date': '20211005',
+ 'uploader': '乌酱',
+ 'uploader_id': '532896',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.taptap.cn/moment/521630629209573493',
+ 'info_dict': {
+ 'id': '521630629209573493',
+ 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
+ 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
+ 'timestamp': 1711425600,
+ 'upload_date': '20240326',
+ 'modified_timestamp': 1711425600,
+ 'modified_date': '20240326',
+ 'uploader': '崩坏:星穹铁道',
+ 'uploader_id': '414732580',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '4006511',
+ 'ext': 'mp4',
+ 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
+ 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
+ 'duration': 173,
+ 'timestamp': 1711425600,
+ 'upload_date': '20240326',
+ 'modified_timestamp': 1711425600,
+ 'modified_date': '20240326',
+ 'uploader': '崩坏:星穹铁道',
+ 'uploader_id': '414732580',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.taptap.cn/moment/540493587511511299',
+ 'playlist_count': 2,
+ 'info_dict': {
+ 'id': '540493587511511299',
+ 'title': '中式民俗解谜《纸嫁衣7》、新系列《纸不语》公布!',
+ 'description': 'md5:d60842350e686ddb242291ddfb8e39c9',
+ 'timestamp': 1715920200,
+ 'upload_date': '20240517',
+ 'modified_timestamp': 1715942225,
+ 'modified_date': '20240517',
+ 'uploader': 'TapTap 编辑',
+ 'uploader_id': '7159244',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class TapTapAppIE(TapTapBaseIE):
+ _VALID_URL = r'https?://www\.taptap\.cn/app/(?P<id>\d+)'
+ _INFO_API = 'https://www.taptap.cn/webapiv2/app/v4/detail'
+ _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
+ _META_PATH = {
+ 'title': ('title', {str}),
+ 'description': ('description', 'text', {str}, {clean_html}),
+ }
+ _TESTS = [{
+ 'url': 'https://www.taptap.cn/app/168332',
+ 'info_dict': {
+ 'id': '168332',
+ 'title': '原神',
+ 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
+ },
+ 'playlist_count': 2,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '4058443',
+ 'ext': 'mp4',
+ 'title': '原神',
+ 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
+ 'duration': 26,
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '4058462',
+ 'ext': 'mp4',
+ 'title': '原神',
+ 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
+ 'duration': 295,
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class TapTapIntlBase(TapTapBaseIE):
+ _X_UA = 'V=1&PN=WebAppIntl2&LANG=zh_TW&VN_CODE=115&VN=0.1.0&LOC=CN&PLT=PC&DS=Android&UID={uuid}&CURR=&DT=PC&OS=Windows&OSV=NT%208.0.0'
+ _VIDEO_API = 'https://www.taptap.io/webapiv2/video-resource/v1/multi-get'
+
+
+class TapTapAppIntlIE(TapTapIntlBase):
+ _VALID_URL = r'https?://www\.taptap\.io/app/(?P<id>\d+)'
+ _INFO_API = 'https://www.taptap.io/webapiv2/i/app/v5/detail'
+ _DATA_PATH = 'app'
+ _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
+ _META_PATH = {
+ 'title': ('title', {str}),
+ 'description': ('description', 'text', {str}, {clean_html}),
+ }
+ _TESTS = [{
+ 'url': 'https://www.taptap.io/app/233287',
+ 'info_dict': {
+ 'id': '233287',
+ 'title': '《虹彩六號 M》',
+ 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2149708997',
+ 'ext': 'mp4',
+ 'title': '《虹彩六號 M》',
+ 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
+ 'duration': 78,
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+
+class TapTapPostIntlIE(TapTapIntlBase):
+ _VALID_URL = r'https?://www\.taptap\.io/post/(?P<id>\d+)'
+ _INFO_API = 'https://www.taptap.io/webapiv2/creation/post/v1/detail'
+ _INFO_QUERY_KEY = 'id_str'
+ _DATA_PATH = 'post'
+ _ID_PATH = ((('videos', ...), 'pin_video'), 'video_id')
+ _META_PATH = {
+ 'timestamp': ('published_time', {int_or_none}),
+ 'modified_timestamp': ('edited_time', {int_or_none}),
+ 'uploader': ('user', 'name', {str}),
+ 'uploader_id': ('user', 'id', {int}, {str_or_none}),
+ 'title': ('title', {str}),
+ 'description': ('list_fields', 'summary', {str}),
+ }
+ _TESTS = [{
+ 'url': 'https://www.taptap.io/post/571785',
+ 'info_dict': {
+ 'id': '571785',
+ 'title': 'Arknights x Rainbow Six Siege | Event PV',
+ 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
+ 'timestamp': 1614664951,
+ 'upload_date': '20210302',
+ 'modified_timestamp': 1614664951,
+ 'modified_date': '20210302',
+ 'uploader': 'TapTap Editor',
+ 'uploader_id': '80224473',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2149491903',
+ 'ext': 'mp4',
+ 'title': 'Arknights x Rainbow Six Siege | Event PV',
+ 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
+ 'duration': 122,
+ 'timestamp': 1614664951,
+ 'upload_date': '20210302',
+ 'modified_timestamp': 1614664951,
+ 'modified_date': '20210302',
+ 'uploader': 'TapTap Editor',
+ 'uploader_id': '80224473',
+ 'thumbnail': r're:^https?://.*\.(png|jpg)',
+ }
+ }],
+ 'params': {'skip_download': 'm3u8'},
+ }]
diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py
index 808c6c7..4e17859 100644
--- a/yt_dlp/extractor/tbs.py
+++ b/yt_dlp/extractor/tbs.py
@@ -2,8 +2,8 @@ import re
from .turner import TurnerBaseIE
from ..compat import (
- compat_urllib_parse_urlparse,
compat_parse_qs,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
float_or_none,
diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py
index 5eac9aa..778fa12 100644
--- a/yt_dlp/extractor/teachable.py
+++ b/yt_dlp/extractor/teachable.py
@@ -3,10 +3,10 @@ import re
from .common import InfoExtractor
from .wistia import WistiaIE
from ..utils import (
- clean_html,
ExtractorError,
- int_or_none,
+ clean_html,
get_element_by_class,
+ int_or_none,
strip_or_none,
urlencode_postdata,
urljoin,
diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py
index 90a9762..7402409 100644
--- a/yt_dlp/extractor/teachertube.py
+++ b/yt_dlp/extractor/teachertube.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
qualities,
)
diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py
index d32f812..3fb899c 100644
--- a/yt_dlp/extractor/teamcoco.py
+++ b/yt_dlp/extractor/teamcoco.py
@@ -13,8 +13,8 @@ from ..utils import (
parse_qs,
traverse_obj,
unified_timestamp,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py
index dd802db..ba25cdc 100644
--- a/yt_dlp/extractor/teamtreehouse.py
+++ b/yt_dlp/extractor/teamtreehouse.py
@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
float_or_none,
get_element_by_class,
get_element_by_id,
diff --git a/yt_dlp/extractor/ted.py b/yt_dlp/extractor/ted.py
index c28a154..0969bbb 100644
--- a/yt_dlp/extractor/ted.py
+++ b/yt_dlp/extractor/ted.py
@@ -2,14 +2,13 @@ import itertools
import re
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
+ parse_duration,
str_to_int,
try_get,
- url_or_none,
unified_strdate,
- parse_duration,
+ url_or_none,
)
diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py
index 212af37..1705c2d 100644
--- a/yt_dlp/extractor/tele13.py
+++ b/yt_dlp/extractor/tele13.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
+ determine_ext,
js_to_json,
qualities,
- determine_ext,
)
diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py
index 72f67e4..a455375 100644
--- a/yt_dlp/extractor/tele5.py
+++ b/yt_dlp/extractor/tele5.py
@@ -1,89 +1,77 @@
-from .dplay import DPlayIE
-from ..compat import compat_urlparse
-from ..utils import (
- ExtractorError,
- extract_attributes,
-)
+import functools
+from .dplay import DiscoveryPlusBaseIE
+from ..utils import join_nonempty
+from ..utils.traversal import traverse_obj
-class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
- _WORKING = False
- _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _GEO_COUNTRIES = ['DE']
+
+class Tele5IE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P<parent_slug>[\w-]+)/(?P<slug_a>[\w-]+)(?:/(?P<slug_b>[\w-]+))?'
_TESTS = [{
- 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
+ # slug_a and slug_b
+ 'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane',
'info_dict': {
- 'id': '1549416',
+ 'id': '6852024',
'ext': 'mp4',
- 'upload_date': '20180814',
- 'timestamp': 1534290623,
- 'title': 'Pandorum',
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'Quarantäne',
+ 'description': 'md5:6af0373bd0fcc4f13e5d47701903d675',
+ 'episode': 'Episode 73',
+ 'episode_number': 73,
+ 'season': 'Season 4',
+ 'season_number': 4,
+ 'series': 'Stargate Atlantis',
+ 'upload_date': '20240525',
+ 'timestamp': 1716643200,
+ 'duration': 2503.2,
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg',
+ 'creators': ['Tele5'],
+ 'tags': [],
},
- 'skip': 'No longer available: "404 Seite nicht gefunden"',
}, {
- # jwplatform, nexx unavailable
- 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
+ # only slug_a
+ 'url': 'https://tele5.de/mediathek/inside-out',
'info_dict': {
- 'id': 'WJuiOlUp',
+ 'id': '6819502',
'ext': 'mp4',
- 'upload_date': '20200603',
- 'timestamp': 1591214400,
- 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters',
- 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97',
+ 'title': 'Inside out',
+ 'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd',
+ 'series': 'Inside out',
+ 'upload_date': '20240523',
+ 'timestamp': 1716494400,
+ 'duration': 5343.4,
+ 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg',
+ 'creators': ['Tele5'],
+ 'tags': [],
},
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'No longer available, redirects to Filme page',
}, {
- 'url': 'https://tele5.de/mediathek/angel-of-mine/',
+ # playlist
+ 'url': 'https://tele5.de/mediathek/schlefaz',
'info_dict': {
- 'id': '1252360',
- 'ext': 'mp4',
- 'upload_date': '20220109',
- 'timestamp': 1641762000,
- 'title': 'Angel of Mine',
- 'description': 'md5:a72546a175e1286eb3251843a52d1ad7',
+ 'id': 'mediathek-schlefaz',
},
- 'params': {
- 'format': 'bestvideo',
- },
- }, {
- 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/anders-ist-sevda/',
- 'only_matching': True,
+ 'playlist_mincount': 3,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
- player_info = extract_attributes(player_element)
- asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
- endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
- source_type = player_info.get('sourcetype')
- if source_type:
- endpoint = '%s-%s' % (source_type, endpoint)
- try:
- return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
- except ExtractorError as e:
- if getattr(e, 'message', '') == 'Missing deviceId in context':
- self.report_drm(video_id)
- raise
+ parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b')
+ playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-')
+
+ query = {'environment': 'tele5', 'v': '2'}
+ if not slug_b:
+ endpoint = f'page/{slug_a}'
+ query['parent_slug'] = parent_slug
+ else:
+ endpoint = f'videos/{slug_b}'
+ query['filter[show.slug]'] = slug_a
+ cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query)
+
+ return self.playlist_result(map(
+ functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'),
+ traverse_obj(cms_data, ('blocks', ..., 'videoId', {str}))), playlist_id)
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': f'realm={realm}',
+ 'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py
index 5fdcddd..380c84d 100644
--- a/yt_dlp/extractor/telewebion.py
+++ b/yt_dlp/extractor/telewebion.py
@@ -1,4 +1,5 @@
from __future__ import annotations
+
import functools
import json
import textwrap
diff --git a/yt_dlp/extractor/tempo.py b/yt_dlp/extractor/tempo.py
index 9318d6f..71e54eb 100644
--- a/yt_dlp/extractor/tempo.py
+++ b/yt_dlp/extractor/tempo.py
@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
traverse_obj,
- try_call
+ try_call,
)
diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py
index 6618ea4..ae2cb48 100644
--- a/yt_dlp/extractor/tencent.py
+++ b/yt_dlp/extractor/tencent.py
@@ -8,8 +8,8 @@ from .common import InfoExtractor
from ..aes import aes_cbc_encrypt_bytes
from ..utils import (
ExtractorError,
- float_or_none,
determine_ext,
+ float_or_none,
int_or_none,
js_to_json,
traverse_obj,
diff --git a/yt_dlp/extractor/theguardian.py b/yt_dlp/extractor/theguardian.py
index a231ecc..fb64077 100644
--- a/yt_dlp/extractor/theguardian.py
+++ b/yt_dlp/extractor/theguardian.py
@@ -10,7 +10,7 @@ from ..utils import (
parse_qs,
traverse_obj,
unified_strdate,
- urljoin
+ urljoin,
)
diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py
index a991a4d..99f0d42 100644
--- a/yt_dlp/extractor/theintercept.py
+++ b/yt_dlp/extractor/theintercept.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- parse_iso8601,
- int_or_none,
ExtractorError,
+ int_or_none,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py
index 9160f5e..eeb33a6 100644
--- a/yt_dlp/extractor/theplatform.py
+++ b/yt_dlp/extractor/theplatform.py
@@ -1,29 +1,27 @@
-import re
-import time
-import hmac
import binascii
import hashlib
+import hmac
+import re
+import time
-
-from .once import OnceIE
from .adobepass import AdobePassIE
-from ..networking import Request
+from .once import OnceIE
+from ..networking import HEADRequest, Request
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
+ find_xpath_attr,
float_or_none,
int_or_none,
- parse_qs,
- unsmuggle_url,
- update_url_query,
- xpath_with_ns,
mimetype2ext,
- find_xpath_attr,
+ parse_qs,
traverse_obj,
+ unsmuggle_url,
update_url,
+ update_url_query,
urlhandle_detect_ext,
+ xpath_with_ns,
)
-from ..networking import HEADRequest
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
diff --git a/yt_dlp/extractor/thisvid.py b/yt_dlp/extractor/thisvid.py
index 9d3368e..04b0838 100644
--- a/yt_dlp/extractor/thisvid.py
+++ b/yt_dlp/extractor/thisvid.py
@@ -134,7 +134,7 @@ class ThisVidPlaylistBaseIE(InfoExtractor):
title = re.split(
r'(?i)\s*\|\s*ThisVid\.com\s*$',
self._og_search_title(webpage, default=None)
- or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
+ or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', maxsplit=1)[0] or None
return self.playlist_from_matches(
self._generate_playlist_entries(url, playlist_id, webpage),
diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py
index 7841f8d..f7a13d2 100644
--- a/yt_dlp/extractor/threeqsdn.py
+++ b/yt_dlp/extractor/threeqsdn.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
float_or_none,
int_or_none,
join_nonempty,
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 3f5261a..ab8efc1 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -1,3 +1,4 @@
+import functools
import itertools
import json
import random
@@ -11,14 +12,16 @@ from ..compat import compat_urllib_parse_urlparse
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
- LazyList,
UnsupportedError,
UserNotLive,
determine_ext,
+ filter_dict,
format_field,
int_or_none,
join_nonempty,
merge_dicts,
+ mimetype2ext,
+ parse_qs,
qualities,
remove_start,
srt_subtitles_timecode,
@@ -45,19 +48,28 @@ class TikTokBaseIE(InfoExtractor):
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
'aid': '0',
}
- _KNOWN_APP_INFO = [
- '7351144126450059040',
- '7351149742343391009',
- '7351153174894626592',
- ]
_APP_INFO_POOL = None
_APP_INFO = None
_APP_USER_AGENT = None
- @property
+ @functools.cached_property
+ def _KNOWN_APP_INFO(self):
+ # If we have a genuine device ID, we may not need any IID
+ default = [''] if self._KNOWN_DEVICE_ID else []
+ return self._configuration_arg('app_info', default, ie_key=TikTokIE)
+
+ @functools.cached_property
+ def _KNOWN_DEVICE_ID(self):
+ return self._configuration_arg('device_id', [None], ie_key=TikTokIE)[0]
+
+ @functools.cached_property
+ def _DEVICE_ID(self):
+ return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000))
+
+ @functools.cached_property
def _API_HOSTNAME(self):
return self._configuration_arg(
- 'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0]
+ 'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0]
def _get_next_app_info(self):
if self._APP_INFO_POOL is None:
@@ -66,13 +78,10 @@ class TikTokBaseIE(InfoExtractor):
for key, default in self._APP_INFO_DEFAULTS.items()
if key != 'iid'
}
- app_info_list = (
- self._configuration_arg('app_info', ie_key=TikTokIE)
- or random.sample(self._KNOWN_APP_INFO, len(self._KNOWN_APP_INFO)))
self._APP_INFO_POOL = [
{**defaults, **dict(
(k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
- )} for app_info in app_info_list
+ )} for app_info in self._KNOWN_APP_INFO
]
if not self._APP_INFO_POOL:
@@ -119,7 +128,7 @@ class TikTokBaseIE(InfoExtractor):
}, query=query)
def _build_api_query(self, query):
- return {
+ return filter_dict({
**query,
'device_platform': 'android',
'os': 'android',
@@ -160,10 +169,10 @@ class TikTokBaseIE(InfoExtractor):
'build_number': self._APP_INFO['app_version'],
'region': 'US',
'ts': int(time.time()),
- 'iid': self._APP_INFO['iid'],
- 'device_id': random.randint(7250000000000000000, 7351147085025500000),
+ 'iid': self._APP_INFO.get('iid'),
+ 'device_id': self._DEVICE_ID,
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
- }
+ })
def _call_api(self, ep, query, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
@@ -203,7 +212,31 @@ class TikTokBaseIE(InfoExtractor):
raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
return self._parse_aweme_video_app(aweme_detail)
- def _get_subtitles(self, aweme_detail, aweme_id):
+ def _extract_web_data_and_status(self, url, video_id, fatal=True):
+ webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or ''
+ video_data, status = {}, None
+
+ if universal_data := self._get_universal_data(webpage, video_id):
+ self.write_debug('Found universal data for rehydration')
+ status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
+ video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
+
+ elif sigi_data := self._get_sigi_state(webpage, video_id):
+ self.write_debug('Found sigi state data')
+ status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
+ video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
+
+ elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
+ self.write_debug('Found next.js data')
+ status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
+ video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
+
+ elif fatal:
+ raise ExtractorError('Unable to extract webpage video data')
+
+ return video_data, status
+
+ def _get_subtitles(self, aweme_detail, aweme_id, user_name):
# TODO: Extract text positioning info
subtitles = {}
# aweme/detail endpoint subs
@@ -234,32 +267,32 @@ class TikTokBaseIE(InfoExtractor):
})
# webpage subs
if not subtitles:
- for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', ...), expected_type=dict):
- if not caption.get('Url'):
- continue
+ if user_name: # only _parse_aweme_video_app needs to extract the webpage here
+ aweme_detail, _ = self._extract_web_data_and_status(
+ self._create_url(user_name, aweme_id), aweme_id, fatal=False)
+ for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
'ext': remove_start(caption.get('Format'), 'web'),
'url': caption['Url'],
})
return subtitles
+ def _parse_url_key(self, url_key):
+ format_id, codec, res, bitrate = self._search_regex(
+ r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
+ 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
+ if not format_id:
+ return {}, None
+ return {
+ 'format_id': format_id,
+ 'vcodec': 'h265' if codec == 'bytevc1' else codec,
+ 'tbr': int_or_none(bitrate, scale=1000) or None,
+ 'quality': qualities(self.QUALITIES)(res),
+ }, res
+
def _parse_aweme_video_app(self, aweme_detail):
aweme_id = aweme_detail['aweme_id']
video_info = aweme_detail['video']
-
- def parse_url_key(url_key):
- format_id, codec, res, bitrate = self._search_regex(
- r'v[^_]+_(?P<id>(?P<codec>[^_]+)_(?P<res>\d+p)_(?P<bitrate>\d+))', url_key,
- 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate'))
- if not format_id:
- return {}, None
- return {
- 'format_id': format_id,
- 'vcodec': 'h265' if codec == 'bytevc1' else codec,
- 'tbr': int_or_none(bitrate, scale=1000) or None,
- 'quality': qualities(self.QUALITIES)(res),
- }, res
-
known_resolutions = {}
def audio_meta(url):
@@ -274,7 +307,7 @@ class TikTokBaseIE(InfoExtractor):
} if ext == 'mp3' or '-music-' in url else {}
def extract_addr(addr, add_meta={}):
- parsed_meta, res = parse_url_key(addr.get('url_key', ''))
+ parsed_meta, res = self._parse_url_key(addr.get('url_key', ''))
is_bytevc2 = parsed_meta.get('vcodec') == 'bytevc2'
if res:
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
@@ -288,7 +321,7 @@ class TikTokBaseIE(InfoExtractor):
'acodec': 'aac',
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
**add_meta, **parsed_meta,
- # bytevc2 is bytedance's proprietary (unplayable) video codec
+ # bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable
'preference': -100 if is_bytevc2 else -1,
'format_note': join_nonempty(
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None,
@@ -300,6 +333,7 @@ class TikTokBaseIE(InfoExtractor):
formats = []
width = int_or_none(video_info.get('width'))
height = int_or_none(video_info.get('height'))
+ ratio = try_call(lambda: width / height) or 0.5625
if video_info.get('play_addr'):
formats.extend(extract_addr(video_info['play_addr'], {
'format_id': 'play_addr',
@@ -316,8 +350,8 @@ class TikTokBaseIE(InfoExtractor):
'format_id': 'download_addr',
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
'vcodec': 'h264',
- 'width': dl_width or width,
- 'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
+ 'width': dl_width,
+ 'height': try_call(lambda: int(dl_width / ratio)), # download_addr['height'] is wrong
'preference': -2 if video_info.get('has_watermark') else -1,
}))
if video_info.get('play_addr_h264'):
@@ -360,11 +394,7 @@ class TikTokBaseIE(InfoExtractor):
})
stats_info = aweme_detail.get('statistics') or {}
- author_info = aweme_detail.get('author') or {}
music_info = aweme_detail.get('music') or {}
- user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
- 'sec_uid', 'id', 'uid', 'unique_id',
- expected_type=str_or_none, get_all=False))
labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str)
contained_music_track = traverse_obj(
@@ -378,6 +408,13 @@ class TikTokBaseIE(InfoExtractor):
else:
music_track, music_author = music_info.get('title'), traverse_obj(music_info, ('author', {str}))
+ author_info = traverse_obj(aweme_detail, ('author', {
+ 'uploader': ('unique_id', {str}),
+ 'uploader_id': ('uid', {str_or_none}),
+ 'channel': ('nickname', {str}),
+ 'channel_id': ('sec_uid', {str}),
+ }))
+
return {
'id': aweme_id,
**traverse_obj(aweme_detail, {
@@ -391,21 +428,20 @@ class TikTokBaseIE(InfoExtractor):
'repost_count': 'share_count',
'comment_count': 'comment_count',
}, expected_type=int_or_none),
- **traverse_obj(author_info, {
- 'uploader': ('unique_id', {str}),
- 'uploader_id': ('uid', {str_or_none}),
- 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
- 'channel': ('nickname', {str}),
- 'channel_id': ('sec_uid', {str}),
- }),
- 'uploader_url': user_url,
+ **author_info,
+ 'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None),
+ 'uploader_url': format_field(
+ author_info, ['uploader', 'uploader_id'], self._UPLOADER_URL_FORMAT, default=None),
'track': music_track,
'album': str_or_none(music_info.get('album')) or None,
'artists': re.split(r'(?:, | & )', music_author) if music_author else None,
'formats': formats,
- 'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
+ 'subtitles': self.extract_subtitles(
+ aweme_detail, aweme_id, traverse_obj(author_info, 'uploader', 'uploader_id', 'channel_id')),
'thumbnails': thumbnails,
- 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000),
+ 'duration': (traverse_obj(video_info, (
+ (None, 'download_addr'), 'duration', {functools.partial(int_or_none, scale=1000)}, any))
+ or traverse_obj(music_info, ('duration', {int_or_none}))),
'availability': self._availability(
is_private='Private' in labels,
needs_subscription='Friends only' in labels,
@@ -413,78 +449,136 @@ class TikTokBaseIE(InfoExtractor):
'_format_sort_fields': ('quality', 'codec', 'size', 'br'),
}
- def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
- video_info = aweme_detail['video']
- author_info = traverse_obj(aweme_detail, 'authorInfo', 'author', expected_type=dict, default={})
- music_info = aweme_detail.get('music') or {}
- stats_info = aweme_detail.get('stats') or {}
- channel_id = traverse_obj(author_info or aweme_detail, (('authorSecId', 'secUid'), {str}), get_all=False)
- user_url = self._UPLOADER_URL_FORMAT % channel_id if channel_id else None
-
+ def _extract_web_formats(self, aweme_detail):
+ COMMON_FORMAT_INFO = {
+ 'ext': 'mp4',
+ 'vcodec': 'h264',
+ 'acodec': 'aac',
+ }
+ video_info = traverse_obj(aweme_detail, ('video', {dict})) or {}
+ play_width = int_or_none(video_info.get('width'))
+ play_height = int_or_none(video_info.get('height'))
+ ratio = try_call(lambda: play_width / play_height) or 0.5625
formats = []
- width = int_or_none(video_info.get('width'))
- height = int_or_none(video_info.get('height'))
+
+ for bitrate_info in traverse_obj(video_info, ('bitrateInfo', lambda _, v: v['PlayAddr']['UrlList'])):
+ format_info, res = self._parse_url_key(
+ traverse_obj(bitrate_info, ('PlayAddr', 'UrlKey', {str})) or '')
+ # bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable
+ is_bytevc2 = format_info.get('vcodec') == 'bytevc2'
+ format_info.update({
+ 'format_note': 'UNPLAYABLE' if is_bytevc2 else None,
+ 'preference': -100 if is_bytevc2 else -1,
+ 'filesize': traverse_obj(bitrate_info, ('PlayAddr', 'DataSize', {int_or_none})),
+ })
+
+ if dimension := (res and int(res[:-1])):
+ if dimension == 540: # '540p' is actually 576p
+ dimension = 576
+ if ratio < 1: # portrait: res/dimension is width
+ y = int(dimension / ratio)
+ format_info.update({
+ 'width': dimension,
+ 'height': y - (y % 2),
+ })
+ else: # landscape: res/dimension is height
+ x = int(dimension * ratio)
+ format_info.update({
+ 'width': x + (x % 2),
+ 'height': dimension,
+ })
+
+ for video_url in traverse_obj(bitrate_info, ('PlayAddr', 'UrlList', ..., {url_or_none})):
+ formats.append({
+ **COMMON_FORMAT_INFO,
+ **format_info,
+ 'url': self._proto_relative_url(video_url),
+ })
+
+ # We don't have res string for play formats, but need quality for sorting & de-duplication
+ play_quality = traverse_obj(formats, (lambda _, v: v['width'] == play_width, 'quality', any))
for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})):
formats.append({
+ **COMMON_FORMAT_INFO,
+ 'format_id': 'play',
'url': self._proto_relative_url(play_url),
- 'ext': 'mp4',
- 'width': width,
- 'height': height,
+ 'width': play_width,
+ 'height': play_height,
+ 'quality': play_quality,
})
for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})):
formats.append({
+ **COMMON_FORMAT_INFO,
'format_id': 'download',
'url': self._proto_relative_url(download_url),
- 'ext': 'mp4',
- 'width': width,
- 'height': height,
})
self._remove_duplicate_formats(formats)
- thumbnails = []
- for thumb_url in traverse_obj(aweme_detail, (
- (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})):
- thumbnails.append({
- 'url': self._proto_relative_url(thumb_url),
- 'width': width,
- 'height': height,
+ for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']):
+ f.update({
+ 'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '),
+ 'preference': f.get('preference') or -2,
+ })
+
+ # Is it a slideshow with only audio for download?
+ if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})):
+ audio_url = aweme_detail['music']['playUrl']
+ ext = traverse_obj(parse_qs(audio_url), (
+ 'mime_type', -1, {lambda x: x.replace('_', '/')}, {mimetype2ext})) or 'm4a'
+ formats.append({
+ 'format_id': 'audio',
+ 'url': self._proto_relative_url(audio_url),
+ 'ext': ext,
+ 'acodec': 'aac' if ext == 'm4a' else ext,
+ 'vcodec': 'none',
})
+ return formats
+
+ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False):
+ author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), {
+ 'channel': ('nickname', {str}),
+ 'channel_id': (('authorSecId', 'secUid'), {str}),
+ 'uploader': (('uniqueId', 'author'), {str}),
+ 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
+ }), get_all=False)
+
return {
'id': video_id,
+ 'formats': None if extract_flat else self._extract_web_formats(aweme_detail),
+ 'subtitles': None if extract_flat else self.extract_subtitles(aweme_detail, video_id, None),
+ 'http_headers': {'Referer': webpage_url},
+ **author_info,
+ 'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None),
+ 'uploader_url': format_field(
+ author_info, ['uploader', 'uploader_id'], self._UPLOADER_URL_FORMAT, default=None),
+ **traverse_obj(aweme_detail, ('music', {
+ 'track': ('title', {str}),
+ 'album': ('album', {str}, {lambda x: x or None}),
+ 'artists': ('authorName', {str}, {lambda x: re.split(r'(?:, | & )', x) if x else None}),
+ 'duration': ('duration', {int_or_none}),
+ })),
**traverse_obj(aweme_detail, {
'title': ('desc', {str}),
'description': ('desc', {str}),
- 'duration': ('video', 'duration', {int_or_none}),
+ # audio-only slideshows have a video duration of 0 and an actual audio duration
+ 'duration': ('video', 'duration', {int_or_none}, {lambda x: x or None}),
'timestamp': ('createTime', {int_or_none}),
}),
- **traverse_obj(author_info or aweme_detail, {
- 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
- 'channel': ('nickname', {str}),
- 'uploader': (('uniqueId', 'author'), {str}),
- 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
- }, get_all=False),
- **traverse_obj(stats_info, {
+ **traverse_obj(aweme_detail, ('stats', {
'view_count': 'playCount',
'like_count': 'diggCount',
'repost_count': 'shareCount',
'comment_count': 'commentCount',
- }, expected_type=int_or_none),
- **traverse_obj(music_info, {
- 'track': ('title', {str}),
- 'album': ('album', {str}, {lambda x: x or None}),
- 'artists': ('authorName', {str}, {lambda x: [x] if x else None}),
- }),
- 'channel_id': channel_id,
- 'uploader_url': user_url,
- 'formats': formats,
- 'thumbnails': thumbnails,
- 'http_headers': {
- 'Referer': webpage_url,
- }
+ }), expected_type=int_or_none),
+ 'thumbnails': traverse_obj(aweme_detail, (
+ (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {
+ 'url': ({url_or_none}, {self._proto_relative_url}),
+ },
+ )),
}
@@ -521,21 +615,21 @@ class TikTokIE(TikTokBaseIE):
'skip': '404 Not Found',
}, {
'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
- 'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b',
+ 'md5': 'f21112672ee4ce05ca390fb6522e1b6f',
'info_dict': {
'id': '6742501081818877190',
'ext': 'mp4',
'title': 'md5:5e2a23877420bb85ce6521dbee39ba94',
'description': 'md5:5e2a23877420bb85ce6521dbee39ba94',
'duration': 27,
- 'height': 960,
- 'width': 540,
+ 'height': 1024,
+ 'width': 576,
'uploader': 'patrox',
'uploader_id': '18702747',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
+ 'uploader_url': 'https://www.tiktok.com/@patrox',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
'channel': 'patroX',
- 'creators': ['patroX'],
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
'upload_date': '20190930',
'timestamp': 1569860870,
@@ -547,7 +641,7 @@ class TikTokIE(TikTokBaseIE):
'track': 'Big Fun',
},
}, {
- # Banned audio, only available on the app
+ # Banned audio, was available on the app, now works with web too
'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402',
'info_dict': {
'id': '6984138651336838402',
@@ -556,9 +650,9 @@ class TikTokIE(TikTokBaseIE):
'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
'uploader': 'barudakhb_',
'channel': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
- 'creators': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
'uploader_id': '6974687867511718913',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
+ 'uploader_url': 'https://www.tiktok.com/@barudakhb_',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'track': 'Boka Dance',
'artists': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
@@ -581,7 +675,6 @@ class TikTokIE(TikTokBaseIE):
'description': 'Slap and Run!',
'uploader': 'user440922249',
'channel': 'Slap And Run',
- 'creators': ['Slap And Run'],
'uploader_id': '7036055384943690754',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
@@ -595,7 +688,7 @@ class TikTokIE(TikTokBaseIE):
'repost_count': int,
'comment_count': int,
},
- 'params': {'skip_download': True}, # XXX: unable to download video data: HTTP Error 403: Forbidden
+ 'skip': 'This video is unavailable',
}, {
# Video without title and description
'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
@@ -606,9 +699,9 @@ class TikTokIE(TikTokBaseIE):
'description': '',
'uploader': 'pokemonlife22',
'channel': 'Pokemon',
- 'creators': ['Pokemon'],
'uploader_id': '6820838815978423302',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
+ 'uploader_url': 'https://www.tiktok.com/@pokemonlife22',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'track': 'original sound',
'timestamp': 1643714123,
@@ -653,13 +746,14 @@ class TikTokIE(TikTokBaseIE):
'title': 'TikTok video #7139980461132074283',
'description': '',
'channel': 'Antaura',
- 'creators': ['Antaura'],
'uploader': '_le_cannibale_',
'uploader_id': '6604511138619654149',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
+ 'uploader_url': 'https://www.tiktok.com/@_le_cannibale_',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
'artists': ['nathan !'],
'track': 'grahamscott canon',
+ 'duration': 10,
'upload_date': '20220905',
'timestamp': 1662406249,
'view_count': int,
@@ -670,18 +764,18 @@ class TikTokIE(TikTokBaseIE):
},
}, {
# only available via web
- 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', # FIXME
- 'md5': '6aba7fad816e8709ff2c149679ace165',
+ 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662',
+ 'md5': '4cdefa501ac8ac20bf04986e10916fea',
'info_dict': {
'id': '7206382937372134662',
'ext': 'mp4',
'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
'channel': 'MoxyPatch',
- 'creators': ['MoxyPatch'],
'uploader': 'moxypatch',
'uploader_id': '7039142049363379205',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
+ 'uploader_url': 'https://www.tiktok.com/@moxypatch',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
'artists': ['your worst nightmare'],
'track': 'original sound',
@@ -710,7 +804,6 @@ class TikTokIE(TikTokBaseIE):
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'channel': 'tate mcrae',
- 'creators': ['tate mcrae'],
'artists': ['tate mcrae'],
'track': 'original sound',
'upload_date': '20220609',
@@ -722,7 +815,7 @@ class TikTokIE(TikTokBaseIE):
'comment_count': int,
'thumbnail': r're:^https://.+\.webp',
},
- 'skip': 'Unavailable via feed API, no formats available via web',
+ 'skip': 'Unavailable via feed API, only audio available via web',
}, {
# Slideshow, audio-only m4a format
'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594',
@@ -734,13 +827,14 @@ class TikTokIE(TikTokBaseIE):
'description': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ',
'uploader': 'hara_yoimiya',
'uploader_id': '6582536342634676230',
- 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
+ 'uploader_url': 'https://www.tiktok.com/@hara_yoimiya',
+ 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
- 'channel': 'лампочка',
- 'creators': ['лампочка'],
+ 'channel': 'лампочка(!)',
'artists': ['Øneheart'],
'album': 'watching the stars',
'track': 'watching the stars',
+ 'duration': 60,
'upload_date': '20230708',
'timestamp': 1688816612,
'view_count': int,
@@ -757,32 +851,16 @@ class TikTokIE(TikTokBaseIE):
def _real_extract(self, url):
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
- try:
- return self._extract_aweme_app(video_id)
- except ExtractorError as e:
- e.expected = True
- self.report_warning(f'{e}; trying with webpage')
-
- url = self._create_url(user_id, video_id)
- webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
-
- if universal_data := self._get_universal_data(webpage, video_id):
- self.write_debug('Found universal data for rehydration')
- status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
- video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
-
- elif sigi_data := self._get_sigi_state(webpage, video_id):
- self.write_debug('Found sigi state data')
- status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
- video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
- elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
- self.write_debug('Found next.js data')
- status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
- video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
+ if self._KNOWN_APP_INFO:
+ try:
+ return self._extract_aweme_app(video_id)
+ except ExtractorError as e:
+ e.expected = True
+ self.report_warning(f'{e}; trying with webpage')
- else:
- raise ExtractorError('Unable to extract webpage video data')
+ url = self._create_url(user_id, video_id)
+ video_data, status = self._extract_web_data_and_status(url, video_id)
if video_data and status == 0:
return self._parse_aweme_video_web(video_data, url, video_id)
@@ -793,102 +871,141 @@ class TikTokIE(TikTokBaseIE):
class TikTokUserIE(TikTokBaseIE):
IE_NAME = 'tiktok:user'
- _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])'
- _WORKING = False
+ _VALID_URL = r'(?:tiktokuser:|https?://(?:www\.)?tiktok\.com/@)(?P<id>[\w.-]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://tiktok.com/@corgibobaa?lang=en',
'playlist_mincount': 45,
'info_dict': {
- 'id': '6935371178089399301',
+ 'id': 'MS4wLjABAAAAepiJKgwWhulvCpSuUVsp7sgVVsFJbbNaLeQ6OQ0oAJERGDUIXhb2yxxHZedsItgT',
'title': 'corgibobaa',
- 'thumbnail': r're:https://.+_1080x1080\.webp'
},
- 'expected_warnings': ['Retrying']
}, {
'url': 'https://www.tiktok.com/@6820838815978423302',
'playlist_mincount': 5,
'info_dict': {
- 'id': '6820838815978423302',
+ 'id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'title': '6820838815978423302',
- 'thumbnail': r're:https://.+_1080x1080\.webp'
},
- 'expected_warnings': ['Retrying']
}, {
'url': 'https://www.tiktok.com/@meme',
'playlist_mincount': 593,
'info_dict': {
- 'id': '79005827461758976',
+ 'id': 'MS4wLjABAAAAiKfaDWeCsT3IHwY77zqWGtVRIy9v4ws1HbVi7auP1Vx7dJysU_hc5yRiGywojRD6',
'title': 'meme',
- 'thumbnail': r're:https://.+_1080x1080\.webp'
},
- 'expected_warnings': ['Retrying']
+ }, {
+ 'url': 'tiktokuser:MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ',
+ 'playlist_mincount': 31,
+ 'info_dict': {
+ 'id': 'MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ',
+ },
}]
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0'
+ _API_BASE_URL = 'https://www.tiktok.com/api/creator/item_list/'
- r''' # TODO: Fix by adding _signature to api_url
- def _entries(self, webpage, user_id, username):
- secuid = self._search_regex(r'\"secUid\":\"(?P<secUid>[^\"]+)', webpage, username)
- verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id')
- if not verifyfp_cookie:
- raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True)
- api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor='
- cursor = '0'
- for page in itertools.count():
- data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page)
- for video in data_json.get('itemList', []):
- video_id = video['id']
- video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}'
- yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc')))
- if not data_json.get('hasMore'):
- break
- cursor = data_json['cursor']
- '''
-
- def _video_entries_api(self, webpage, user_id, username):
- query = {
- 'user_id': user_id,
- 'count': 21,
- 'max_cursor': 0,
- 'min_cursor': 0,
- 'retry_type': 'no_retry',
- 'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
+ def _build_web_query(self, sec_uid, cursor):
+ return {
+ 'aid': '1988',
+ 'app_language': 'en',
+ 'app_name': 'tiktok_web',
+ 'browser_language': 'en-US',
+ 'browser_name': 'Mozilla',
+ 'browser_online': 'true',
+ 'browser_platform': 'Win32',
+ 'browser_version': '5.0 (Windows)',
+ 'channel': 'tiktok_web',
+ 'cookie_enabled': 'true',
+ 'count': '15',
+ 'cursor': cursor,
+ 'device_id': self._DEVICE_ID,
+ 'device_platform': 'web_pc',
+ 'focus_state': 'true',
+ 'from_page': 'user',
+ 'history_len': '2',
+ 'is_fullscreen': 'false',
+ 'is_page_visible': 'true',
+ 'language': 'en',
+ 'os': 'windows',
+ 'priority_region': '',
+ 'referer': '',
+ 'region': 'US',
+ 'screen_height': '1080',
+ 'screen_width': '1920',
+ 'secUid': sec_uid,
+ 'type': '1', # pagination type: 0 == oldest-to-newest, 1 == newest-to-oldest
+ 'tz_name': 'UTC',
+ 'verifyFp': f'verify_{"".join(random.choices(string.hexdigits, k=7))}',
+ 'webcast_language': 'en',
}
+ def _entries(self, sec_uid, user_name):
+ display_id = user_name or sec_uid
+
+ cursor = int(time.time() * 1E3)
for page in itertools.count(1):
- for retry in self.RetryManager():
- try:
- post_list = self._call_api(
- 'aweme/post', query, username, note=f'Downloading user video list page {page}',
- errnote='Unable to download user video list')
- except ExtractorError as e:
- if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
- retry.error = e
- continue
- raise
- yield from post_list.get('aweme_list', [])
- if not post_list.get('has_more'):
+ response = self._download_json(
+ self._API_BASE_URL, display_id, f'Downloading page {page}',
+ query=self._build_web_query(sec_uid, cursor), headers={'User-Agent': self._USER_AGENT})
+
+ for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])):
+ video_id = video['id']
+ webpage_url = self._create_url(display_id, video_id)
+ yield self.url_result(
+ webpage_url, TikTokIE,
+ **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True))
+
+ old_cursor = cursor
+ cursor = traverse_obj(
+ response, ('itemList', -1, 'createTime', {functools.partial(int_or_none, invscale=1E3)}))
+ if not cursor:
+ # User may not have posted within this ~1 week lookback, so manually adjust cursor
+ cursor = old_cursor - 7 * 86_400_000
+ # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed
+ if cursor < 1472706000000 or not traverse_obj(response, 'hasMorePrevious'):
break
- query['max_cursor'] = post_list['max_cursor']
-
- def _entries_api(self, user_id, videos):
- for video in videos:
- yield {
- **self._parse_aweme_video_app(video),
- 'extractor_key': TikTokIE.ie_key(),
- 'extractor': 'TikTok',
- 'webpage_url': f'https://tiktok.com/@{user_id}/video/{video["aweme_id"]}',
- }
- def _real_extract(self, url):
- user_name = self._match_id(url)
- webpage = self._download_webpage(url, user_name, headers={
- 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
- })
- user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID', default=None) or user_name
+ def _get_sec_uid(self, user_url, user_name, msg):
+ webpage = self._download_webpage(
+ user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'},
+ note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage') or ''
+ return (traverse_obj(self._get_universal_data(webpage, user_name),
+ ('webapp.user-detail', 'userInfo', 'user', 'secUid', {str}))
+ or traverse_obj(self._get_sigi_state(webpage, user_name),
+ ('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}),
+ ('UserModule', 'users', ..., 'secUid', {str}, any)))
- videos = LazyList(self._video_entries_api(webpage, user_id, user_name))
- thumbnail = traverse_obj(videos, (0, 'author', 'avatar_larger', 'url_list', 0))
+ def _real_extract(self, url):
+ user_name, sec_uid = self._match_id(url), None
+ if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name):
+ user_name, sec_uid = None, mobj.group(0)
+ else:
+ sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user')
+ or self._get_sec_uid(self._UPLOADER_URL_FORMAT % f'{user_name}/live', user_name, 'live'))
+
+ if not sec_uid:
+ webpage = self._download_webpage(
+ f'https://www.tiktok.com/embed/@{user_name}', user_name,
+ note='Downloading user embed page', fatal=False) or ''
+ data = traverse_obj(self._search_json(
+ r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>',
+ webpage, 'data', user_name, default={}),
+ ('source', 'data', f'/embed/@{user_name}', {dict}))
+
+ for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})):
+ webpage_url = self._create_url(user_name, aweme_id)
+ video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False)
+ sec_uid = self._parse_aweme_video_web(
+ video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id')
+ if sec_uid:
+ break
+
+ if not sec_uid:
+ raise ExtractorError(
+ 'Unable to extract secondary user ID. If you are able to get the channel_id '
+ 'from a video posted by this user, try using "tiktokuser:channel_id" as the '
+ 'input URL (replacing `channel_id` with its actual value)', expected=True)
- return self.playlist_result(self._entries_api(user_id, videos), user_id, user_name, thumbnail=thumbnail)
+ return self.playlist_result(self._entries(sec_uid, user_name), sec_uid, user_name)
class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
@@ -898,7 +1015,7 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul
'cursor': 0,
'count': 20,
'type': 5,
- 'device_id': ''.join(random.choices(string.digits, k=19))
+ 'device_id': self._DEVICE_ID,
}
for page in itertools.count(1):
@@ -1000,6 +1117,64 @@ class TikTokTagIE(TikTokBaseListIE):
return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id)
+class TikTokCollectionIE(TikTokBaseIE):
+ IE_NAME = 'tiktok:collection'
+ _VALID_URL = r'https?://www\.tiktok\.com/@(?P<user_id>[\w.-]+)/collection/(?P<title>[^/?#]+)-(?P<id>\d+)/?(?:[?#]|$)'
+ _TESTS = [{
+ # playlist should have exactly 9 videos
+ 'url': 'https://www.tiktok.com/@imanoreotwe/collection/count-test-7371330159376370462',
+ 'info_dict': {
+ 'id': '7371330159376370462',
+ 'title': 'imanoreotwe-count-test'
+ },
+ 'playlist_count': 9
+ }, {
+ # tests returning multiple pages of a large collection
+ 'url': 'https://www.tiktok.com/@imanoreotwe/collection/%F0%9F%98%82-7111887189571160875',
+ 'info_dict': {
+ 'id': '7111887189571160875',
+ 'title': 'imanoreotwe-%F0%9F%98%82'
+ },
+ 'playlist_mincount': 100
+ }]
+ _API_BASE_URL = 'https://www.tiktok.com/api/collection/item_list/'
+ _PAGE_COUNT = 30
+
+ def _build_web_query(self, collection_id, cursor):
+ return {
+ 'aid': '1988',
+ 'collectionId': collection_id,
+ 'count': self._PAGE_COUNT,
+ 'cursor': cursor,
+ 'sourceType': '113',
+ }
+
+ def _entries(self, collection_id):
+ cursor = 0
+ for page in itertools.count(1):
+ response = self._download_json(
+ self._API_BASE_URL, collection_id, f'Downloading page {page}',
+ query=self._build_web_query(collection_id, cursor))
+
+ for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])):
+ video_id = video['id']
+ author = traverse_obj(video, ('author', ('uniqueId', 'secUid', 'id'), {str}, any)) or '_'
+ webpage_url = self._create_url(author, video_id)
+ yield self.url_result(
+ webpage_url, TikTokIE,
+ **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True))
+
+ if not traverse_obj(response, 'hasMore'):
+ break
+ cursor += self._PAGE_COUNT
+
+ def _real_extract(self, url):
+ collection_id, title, user_name = self._match_valid_url(url).group('id', 'title', 'user_id')
+
+ return self.playlist_result(
+ self._entries(collection_id), collection_id, '-'.join((user_name, title)))
+
+
class DouyinIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
_TESTS = [{
@@ -1015,7 +1190,6 @@ class DouyinIE(TikTokBaseIE):
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel': '杨超越',
- 'creators': ['杨超越'],
'duration': 19,
'timestamp': 1620905839,
'upload_date': '20210513',
@@ -1040,7 +1214,6 @@ class DouyinIE(TikTokBaseIE):
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
'channel': '杨超越工作室',
- 'creators': ['杨超越工作室'],
'duration': 42,
'timestamp': 1625739481,
'upload_date': '20210708',
@@ -1065,7 +1238,6 @@ class DouyinIE(TikTokBaseIE):
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel': '杨超越',
- 'creators': ['杨超越'],
'duration': 17,
'timestamp': 1619098692,
'upload_date': '20210422',
@@ -1107,7 +1279,6 @@ class DouyinIE(TikTokBaseIE):
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel': '杨超越',
- 'creators': ['杨超越'],
'duration': 15,
'timestamp': 1621261163,
'upload_date': '20210517',
diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py
index aa7ee6c..ccb2ef8 100644
--- a/yt_dlp/extractor/toypics.py
+++ b/yt_dlp/extractor/toypics.py
@@ -1,6 +1,7 @@
-from .common import InfoExtractor
import re
+from .common import InfoExtractor
+
class ToypicsIE(InfoExtractor):
_WORKING = False
diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py
index 56e51fe..3bdeedd 100644
--- a/yt_dlp/extractor/triller.py
+++ b/yt_dlp/extractor/triller.py
@@ -14,8 +14,8 @@ from ..utils import (
traverse_obj,
unified_timestamp,
url_basename,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py
index 86f0990..efedac1 100644
--- a/yt_dlp/extractor/trueid.py
+++ b/yt_dlp/extractor/trueid.py
@@ -1,13 +1,13 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_age_limit,
traverse_obj,
unified_timestamp,
- url_or_none
+ url_or_none,
)
diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py
index a26bdca..f2d0c59 100644
--- a/yt_dlp/extractor/tumblr.py
+++ b/yt_dlp/extractor/tumblr.py
@@ -3,7 +3,7 @@ from ..utils import (
ExtractorError,
int_or_none,
traverse_obj,
- urlencode_postdata
+ urlencode_postdata,
)
diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py
index 630d84b..b27db87 100644
--- a/yt_dlp/extractor/turner.py
+++ b/yt_dlp/extractor/turner.py
@@ -3,17 +3,17 @@ import re
from .adobepass import AdobePassIE
from ..compat import compat_str
from ..utils import (
- fix_xml_ampersands,
- xpath_text,
- int_or_none,
+ ExtractorError,
determine_ext,
+ fix_xml_ampersands,
float_or_none,
+ int_or_none,
parse_duration,
- xpath_attr,
- update_url_query,
- ExtractorError,
strip_or_none,
+ update_url_query,
url_or_none,
+ xpath_attr,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py
index 7756aa3..9b19e79 100644
--- a/yt_dlp/extractor/tv2.py
+++ b/yt_dlp/extractor/tv2.py
@@ -3,10 +3,10 @@ import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
- int_or_none,
+ determine_ext,
float_or_none,
+ int_or_none,
js_to_json,
parse_iso8601,
remove_end,
diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py
index 9c0a111..cd35ff5 100644
--- a/yt_dlp/extractor/tv2hu.py
+++ b/yt_dlp/extractor/tv2hu.py
@@ -1,8 +1,8 @@
# encoding: utf-8
from .common import InfoExtractor
from ..utils import (
- traverse_obj,
UnsupportedError,
+ traverse_obj,
)
diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py
index a445fae..52ff230 100644
--- a/yt_dlp/extractor/tv5mondeplus.py
+++ b/yt_dlp/extractor/tv5mondeplus.py
@@ -2,85 +2,88 @@ import urllib.parse
from .common import InfoExtractor
from ..utils import (
+ clean_html,
determine_ext,
extract_attributes,
+ get_element_by_class,
+ get_element_html_by_class,
int_or_none,
- parse_duration,
- traverse_obj,
- try_get,
url_or_none,
)
+from ..utils.traversal import traverse_obj
class TV5MondePlusIE(InfoExtractor):
- IE_DESC = 'TV5MONDE+'
- _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
+ IE_NAME = 'TV5MONDE'
+ _VALID_URL = r'https?://(?:www\.)?tv5monde\.com/tv/video/(?P<id>[^/?#]+)'
_TESTS = [{
- # movie
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/les-novices',
- 'md5': 'c86f60bf8b75436455b1b205f9745955',
+ # documentary
+ 'url': 'https://www.tv5monde.com/tv/video/65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
+ 'md5': 'd2a708902d3df230a357c99701aece05',
'info_dict': {
- 'id': 'ZX0ipMyFQq_6D4BA7b',
- 'display_id': 'les-novices',
+ 'id': '3FPa7JMu21_6D4BA7b',
+ 'display_id': '65931-baudouin-l-heritage-d-un-roi-baudouin-l-heritage-d-un-roi',
'ext': 'mp4',
- 'title': 'Les novices',
- 'description': 'md5:2e7c33ba3ad48dabfcc2a956b88bde2b',
- 'upload_date': '20230821',
- 'thumbnail': 'https://revoir.tv5monde.com/uploads/media/video_thumbnail/0738/60/01e952b7ccf36b7c6007ec9131588954ab651de9.jpeg',
- 'duration': 5177,
- 'episode': 'Les novices',
+ 'title': "Baudouin, l'héritage d'un roi",
+ 'thumbnail': 'https://psi.tv5monde.com/upsilon-images/960x540/6f/baudouin-f49c6b0e.jpg',
+ 'duration': 4842,
+ 'upload_date': '20240130',
+ 'timestamp': 1706641242,
+ 'episode': "BAUDOUIN, L'HERITAGE D'UN ROI",
+ 'description': 'md5:78125c74a5cac06d7743a2d09126edad',
+ 'series': "Baudouin, l'héritage d'un roi",
},
}, {
# series episode
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/opj-les-dents-de-la-terre-2',
+ 'url': 'https://www.tv5monde.com/tv/video/52952-toute-la-vie-mardi-23-mars-2021',
+ 'md5': 'f5e09637cadd55639c05874e22eb56bf',
'info_dict': {
- 'id': 'wJ0eeEPozr_6D4BA7b',
- 'display_id': 'opj-les-dents-de-la-terre-2',
+ 'id': 'obRRZ8m6g9_6D4BA7b',
+ 'display_id': '52952-toute-la-vie-mardi-23-mars-2021',
'ext': 'mp4',
- 'title': "OPJ - Les dents de la Terre (2)",
- 'description': 'md5:288f87fd68d993f814e66e60e5302d9d',
- 'upload_date': '20230823',
- 'series': 'OPJ',
- 'episode': 'Les dents de la Terre (2)',
- 'duration': 2877,
- 'thumbnail': 'https://dl-revoir.tv5monde.com/images/1a/5753448.jpg'
+ 'title': 'Toute la vie',
+ 'description': 'md5:a824a2e1dfd94cf45fa379a1fb43ce65',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5880553.jpg',
+ 'duration': 2526,
+ 'upload_date': '20230721',
+ 'timestamp': 1689971646,
+ 'series': 'Toute la vie',
+ 'episode': 'Mardi 23 mars 2021',
},
}, {
# movie
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
- 'md5': '32fa0cde16a4480d1251502a66856d5f',
+ 'url': 'https://www.tv5monde.com/tv/video/8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
+ 'md5': '87cefc34e10a6bf4f7823cccd7b36eb2',
'info_dict': {
- 'id': 'dc57a011-ec4b-4648-2a9a-4f03f8352ed3',
- 'display_id': 'ceux-qui-travaillent',
+ 'id': 'DOcfvdLKXL_6D4BA7b',
+ 'display_id': '8771-ce-fleuve-qui-nous-charrie-ce-fleuve-qui-nous-charrie-p001-ce-fleuve-qui-nous-charrie',
'ext': 'mp4',
- 'title': 'Ceux qui travaillent',
- 'description': 'md5:570e8bb688036ace873b2d50d24c026d',
- 'upload_date': '20210819',
+ 'title': 'Ce fleuve qui nous charrie',
+ 'description': 'md5:62ba3f875343c7fc4082bdfbbc1be992',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/5476617.jpg',
+ 'duration': 5300,
+ 'upload_date': '20210822',
+ 'timestamp': 1629594105,
+ 'episode': 'CE FLEUVE QUI NOUS CHARRIE-P001-CE FLEUVE QUI NOUS CHARRIE',
+ 'series': 'Ce fleuve qui nous charrie',
},
- 'skip': 'no longer available',
}, {
- # series episode
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
+ # news
+ 'url': 'https://www.tv5monde.com/tv/video/70402-tv5monde-le-journal-edition-du-08-05-24-11h',
+ 'md5': 'c62977d6d10754a2ecebba70ad370479',
'info_dict': {
- 'id': '9e9d599e-23af-6915-843e-ecbf62e97925',
- 'display_id': 'vestiaires-caro-actrice',
+ 'id': 'LgQFrOCNsc_6D4BA7b',
+ 'display_id': '70402-tv5monde-le-journal-edition-du-08-05-24-11h',
'ext': 'mp4',
- 'title': "Vestiaires - Caro actrice",
- 'description': 'md5:db15d2e1976641e08377f942778058ea',
- 'upload_date': '20210819',
- 'series': "Vestiaires",
- 'episode': 'Caro actrice',
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'TV5MONDE, le journal',
+ 'description': 'md5:777dc209eaa4423b678477c36b0b04a8',
+ 'thumbnail': 'https://psi.tv5monde.com/media/image/960px/6184105.jpg',
+ 'duration': 854,
+ 'upload_date': '20240508',
+ 'timestamp': 1715159640,
+ 'series': 'TV5MONDE, le journal',
+ 'episode': 'EDITION DU 08/05/24 - 11H',
},
- 'skip': 'no longer available',
- }, {
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
- 'only_matching': True,
- }, {
- 'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
- 'only_matching': True,
}]
_GEO_BYPASS = False
@@ -98,7 +101,6 @@ class TV5MondePlusIE(InfoExtractor):
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
- title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
vpl_data = extract_attributes(self._search_regex(
r'(<[^>]+class="video_player_loader"[^>]+>)',
webpage, 'video player loader'))
@@ -147,26 +149,7 @@ class TV5MondePlusIE(InfoExtractor):
process_video_files(video_files)
metadata = self._parse_json(
- vpl_data['data-metadata'], display_id)
- duration = (int_or_none(try_get(metadata, lambda x: x['content']['duration']))
- or parse_duration(self._html_search_meta('duration', webpage)))
-
- description = self._html_search_regex(
- r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
- 'description', fatal=False)
-
- series = self._html_search_regex(
- r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
- 'series', default=None)
-
- if series and series != title:
- title = '%s - %s' % (series, title)
-
- upload_date = self._search_regex(
- r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
- webpage, 'upload date', default=None)
- if upload_date:
- upload_date = upload_date.replace('_', '')
+ vpl_data.get('data-metadata') or '{}', display_id, fatal=False)
if not video_id:
video_id = self._search_regex(
@@ -175,16 +158,20 @@ class TV5MondePlusIE(InfoExtractor):
default=display_id)
return {
+ **traverse_obj(metadata, ('content', {
+ 'id': ('id', {str}),
+ 'title': ('title', {str}),
+ 'episode': ('title', {str}),
+ 'series': ('series', {str}),
+ 'timestamp': ('publishDate_ts', {int_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ })),
'id': video_id,
'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': vpl_data.get('data-image'),
- 'duration': duration,
- 'upload_date': upload_date,
+ 'title': clean_html(get_element_by_class('main-title', webpage)),
+ 'description': clean_html(get_element_by_class('text', get_element_html_by_class('ep-summary', webpage) or '')),
+ 'thumbnail': url_or_none(vpl_data.get('data-image')),
'formats': formats,
'subtitles': self._extract_subtitles(self._parse_json(
traverse_obj(vpl_data, ('data-captions', {str}), default='{}'), display_id, fatal=False)),
- 'series': series,
- 'episode': episode,
}
diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py
index 9afe233..e3e1055 100644
--- a/yt_dlp/extractor/tva.py
+++ b/yt_dlp/extractor/tva.py
@@ -1,10 +1,9 @@
+import functools
+import re
+
from .common import InfoExtractor
-from ..utils import (
- float_or_none,
- int_or_none,
- smuggle_url,
- strip_or_none,
-)
+from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
+from ..utils.traversal import traverse_obj
class TVAIE(InfoExtractor):
@@ -49,11 +48,20 @@ class QubIE(InfoExtractor):
'info_dict': {
'id': '6084352463001',
'ext': 'mp4',
- 'title': 'Épisode 01',
+ 'title': 'Ép 01. Mon dernier jour',
'uploader_id': '5481942443001',
'upload_date': '20190907',
'timestamp': 1567899756,
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'episode': 'Ép 01. Mon dernier jour',
+ 'episode_number': 1,
+ 'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
+ 'duration': 2625.963,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'series': 'Alerte Amber',
+ 'channel': 'TVA',
},
}, {
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
@@ -64,22 +72,24 @@ class QubIE(InfoExtractor):
def _real_extract(self, url):
entity_id = self._match_id(url)
- entity = self._download_json(
- 'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
- entity_id, query={'id': entity_id})
+ webpage = self._download_webpage(url, entity_id)
+ entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
video_id = entity['videoId']
episode = strip_or_none(entity.get('name'))
return {
'_type': 'url_transparent',
+ 'url': f'https://videos.tva.ca/details/_{video_id}',
+ 'ie_key': TVAIE.ie_key(),
'id': video_id,
'title': episode,
- # 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
- 'url': 'https://videos.tva.ca/details/_' + video_id,
- 'description': entity.get('longDescription'),
- 'duration': float_or_none(entity.get('durationMillis'), 1000),
'episode': episode,
- 'episode_number': int_or_none(entity.get('episodeNumber')),
- # 'ie_key': 'BrightcoveNew',
- 'ie_key': TVAIE.ie_key(),
+ **traverse_obj(entity, {
+ 'description': ('longDescription', {str}),
+ 'duration': ('durationMillis', {functools.partial(float_or_none, scale=1000)}),
+ 'channel': ('knownEntities', 'channel', 'name', {str}),
+ 'series': ('knownEntities', 'videoShow', 'name', {str}),
+ 'season_number': ('slug', {lambda x: re.search(r'/s(?:ai|ea)son-(\d+)/', x)}, 1, {int_or_none}),
+ 'episode_number': ('episodeNumber', {int_or_none}),
+ }),
}
diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py
index b9f5e11..dbebda4 100644
--- a/yt_dlp/extractor/tvanouvelles.py
+++ b/yt_dlp/extractor/tvanouvelles.py
@@ -1,7 +1,7 @@
import re
-from .common import InfoExtractor
from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
class TVANouvellesIE(InfoExtractor):
diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py
index 5276813..ac48058 100644
--- a/yt_dlp/extractor/tvn24.py
+++ b/yt_dlp/extractor/tvn24.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
NO_DEFAULT,
+ int_or_none,
unescapeHTML,
)
diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py
index a8d00e2..f1ebf02 100644
--- a/yt_dlp/extractor/tvp.py
+++ b/yt_dlp/extractor/tvp.py
@@ -4,10 +4,10 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
dict_get,
- ExtractorError,
int_or_none,
js_to_json,
str_or_none,
diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py
index 48a6efe..29185d3 100644
--- a/yt_dlp/extractor/tvplay.py
+++ b/yt_dlp/extractor/tvplay.py
@@ -4,8 +4,8 @@ from .common import InfoExtractor
from ..compat import compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_iso8601,
qualities,
diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py
index 228c236..d43bdc2 100644
--- a/yt_dlp/extractor/tvplayer.py
+++ b/yt_dlp/extractor/tvplayer.py
@@ -2,10 +2,10 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
extract_attributes,
try_get,
urlencode_postdata,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py
index e8e1fc6..9249550 100644
--- a/yt_dlp/extractor/tweakers.py
+++ b/yt_dlp/extractor/tweakers.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
determine_ext,
+ int_or_none,
mimetype2ext,
)
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index ecc8656..1a11162 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -1,10 +1,10 @@
+import functools
import json
import random
import re
from .common import InfoExtractor
from .periscope import PeriscopeBaseIE, PeriscopeIE
-from ..compat import functools # isort: split
from ..compat import (
compat_parse_qs,
compat_urllib_parse_unquote,
@@ -34,9 +34,9 @@ from ..utils import (
class TwitterBaseIE(InfoExtractor):
_NETRC_MACHINE = 'twitter'
- _API_BASE = 'https://api.twitter.com/1.1/'
- _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
- _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
+ _API_BASE = 'https://api.x.com/1.1/'
+ _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
+ _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
_AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
_LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
_flow_token = None
@@ -153,6 +153,14 @@ class TwitterBaseIE(InfoExtractor):
def is_logged_in(self):
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
+ # XXX: Temporary workaround until twitter.com => x.com migration is completed
+ def _real_initialize(self):
+ if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
+ return
+ # User has not yet been migrated to x.com and has passed twitter.com cookies
+ TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
+ TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
+
@functools.cached_property
def _selected_api(self):
return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
@@ -196,17 +204,15 @@ class TwitterBaseIE(InfoExtractor):
if self.is_logged_in:
return
- webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
- guest_token = self._search_regex(
- r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
+ guest_token = self._fetch_guest_token(None)
headers = {
**self._set_base_headers(),
'content-type': 'application/json',
'x-guest-token': guest_token,
'x-twitter-client-language': 'en',
'x-twitter-active-user': 'yes',
- 'Referer': 'https://twitter.com/',
- 'Origin': 'https://twitter.com',
+ 'Referer': 'https://x.com/',
+ 'Origin': 'https://x.com',
}
def build_login_json(*subtask_inputs):
@@ -1192,6 +1198,31 @@ class TwitterIE(TwitterBaseIE):
'_old_archive_ids': ['twitter 1724884212803834154'],
},
}, {
+ # x.com
+ 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
+ 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
+ 'info_dict': {
+ 'id': '1790637589910654976',
+ 'ext': 'mp4',
+ 'title': 'Historic Vids - One of the most intense moments in history',
+ 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
+ 'display_id': '1790637656616943991',
+ 'uploader': 'Historic Vids',
+ 'uploader_id': 'historyinmemes',
+ 'uploader_url': 'https://twitter.com/historyinmemes',
+ 'channel_id': '855481986290524160',
+ 'upload_date': '20240515',
+ 'timestamp': 1715756260.0,
+ 'duration': 15.488,
+ 'tags': [],
+ 'comment_count': int,
+ 'repost_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
+ 'age_limit': 0,
+ '_old_archive_ids': ['twitter 1790637656616943991'],
+ }
+ }, {
# onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
'only_matching': True,
diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py
index 10668ac..d5849d2 100644
--- a/yt_dlp/extractor/udn.py
+++ b/yt_dlp/extractor/udn.py
@@ -1,12 +1,12 @@
import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
determine_ext,
int_or_none,
js_to_json,
)
-from ..compat import compat_urlparse
class UDNEmbedIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ukcolumn.py b/yt_dlp/extractor/ukcolumn.py
index f914613..f141804 100644
--- a/yt_dlp/extractor/ukcolumn.py
+++ b/yt_dlp/extractor/ukcolumn.py
@@ -1,11 +1,11 @@
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from .youtube import YoutubeIE
from ..utils import (
+ ExtractorError,
unescapeHTML,
urljoin,
- ExtractorError,
)
-from .common import InfoExtractor
-from .vimeo import VimeoIE
-from .youtube import YoutubeIE
class UkColumnIE(InfoExtractor):
diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py
index 4316c31..1e2d118 100644
--- a/yt_dlp/extractor/unsupported.py
+++ b/yt_dlp/extractor/unsupported.py
@@ -173,6 +173,20 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
r'filemoon\.sx',
r'hentai\.animestigma\.com',
r'thisav\.com',
+ r'gounlimited\.to',
+ r'highstream\.tv',
+ r'uqload\.com',
+ r'vedbam\.xyz',
+ r'vadbam\.net'
+ r'vidlo\.us',
+ r'wolfstream\.tv',
+ r'xvideosharing\.com',
+ r'(?:\w+\.)?viidshar\.com',
+ r'sxyprn\.com',
+ r'jable\.tv',
+ r'91porn\.com',
+ r'einthusan\.(?:tv|com|ca)',
+ r'yourupload\.com',
)
_TESTS = [{
diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py
index 7f97fc9..928e6e1 100644
--- a/yt_dlp/extractor/urplay.py
+++ b/yt_dlp/extractor/urplay.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..utils import (
- dict_get,
ExtractorError,
- int_or_none,
ISO639Utils,
+ dict_get,
+ int_or_none,
parse_age_limit,
try_get,
unified_timestamp,
diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py
index 3243f3e..42a28c5 100644
--- a/yt_dlp/extractor/usatoday.py
+++ b/yt_dlp/extractor/usatoday.py
@@ -1,4 +1,5 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
get_element_by_attribute,
@@ -6,7 +7,6 @@ from ..utils import (
try_get,
update_url_query,
)
-from ..compat import compat_str
class USATodayIE(InfoExtractor):
diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py
index 5df2416..046e3d7 100644
--- a/yt_dlp/extractor/ustream.py
+++ b/yt_dlp/extractor/ustream.py
@@ -7,10 +7,10 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
- encode_data_uri,
ExtractorError,
- int_or_none,
+ encode_data_uri,
float_or_none,
+ int_or_none,
join_nonempty,
mimetype2ext,
str_or_none,
diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py
index c3aeeb9..f6ce5b3 100644
--- a/yt_dlp/extractor/ustudio.py
+++ b/yt_dlp/extractor/ustudio.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
- unified_strdate,
unescapeHTML,
+ unified_strdate,
)
diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py
index ef44d42..205f8ea 100644
--- a/yt_dlp/extractor/veo.py
+++ b/yt_dlp/extractor/veo.py
@@ -1,5 +1,4 @@
from .common import InfoExtractor
-
from ..utils import (
int_or_none,
mimetype2ext,
diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py
index 3f2dddb..a2e9022 100644
--- a/yt_dlp/extractor/vesti.py
+++ b/yt_dlp/extractor/vesti.py
@@ -1,8 +1,8 @@
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
from .rutv import RUTVIE
+from ..utils import ExtractorError
class VestiIE(InfoExtractor):
diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py
index aa40227..7715d68 100644
--- a/yt_dlp/extractor/vevo.py
+++ b/yt_dlp/extractor/vevo.py
@@ -1,5 +1,5 @@
-import re
import json
+import re
from .common import InfoExtractor
from ..compat import compat_str
diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py
index d31908f..b072d9d 100644
--- a/yt_dlp/extractor/vice.py
+++ b/yt_dlp/extractor/vice.py
@@ -10,10 +10,10 @@ from .youtube import YoutubeIE
from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import (
- clean_html,
ExtractorError,
- int_or_none,
OnDemandPagedList,
+ clean_html,
+ int_or_none,
parse_age_limit,
str_or_none,
try_get,
diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py
index 770aa28..6322bb0 100644
--- a/yt_dlp/extractor/vidio.py
+++ b/yt_dlp/extractor/vidio.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
format_field,
get_element_by_class,
int_or_none,
diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py
index 44353b7..e1219a8 100644
--- a/yt_dlp/extractor/vidlii.py
+++ b/yt_dlp/extractor/vidlii.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
- format_field,
float_or_none,
+ format_field,
get_element_by_id,
int_or_none,
str_to_int,
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 91b9764..ac96ade 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -1,21 +1,21 @@
import base64
import functools
-import re
import itertools
+import re
from .common import InfoExtractor
from ..compat import compat_str, compat_urlparse
from ..networking import HEADRequest, Request
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
clean_html,
determine_ext,
- ExtractorError,
get_element_by_class,
- js_to_json,
int_or_none,
+ js_to_json,
merge_dicts,
- OnDemandPagedList,
parse_filesize,
parse_iso8601,
parse_qs,
@@ -26,8 +26,8 @@ from ..utils import (
unified_timestamp,
unsmuggle_url,
urlencode_postdata,
- urljoin,
urlhandle_detect_ext,
+ urljoin,
)
diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py
index 6f9af9f..480f49b 100644
--- a/yt_dlp/extractor/viu.py
+++ b/yt_dlp/extractor/viu.py
@@ -1,8 +1,8 @@
-import re
import json
-import uuid
import random
+import re
import urllib.parse
+import uuid
from .common import InfoExtractor
from ..compat import compat_str
@@ -10,10 +10,10 @@ from ..utils import (
ExtractorError,
int_or_none,
remove_end,
+ smuggle_url,
strip_or_none,
traverse_obj,
try_get,
- smuggle_url,
unified_timestamp,
unsmuggle_url,
url_or_none,
diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py
index 7e3a3a9..9a3c75b 100644
--- a/yt_dlp/extractor/vk.py
+++ b/yt_dlp/extractor/vk.py
@@ -20,6 +20,7 @@ from ..utils import (
parse_resolution,
str_or_none,
str_to_int,
+ traverse_obj,
try_call,
unescapeHTML,
unified_timestamp,
@@ -27,7 +28,6 @@ from ..utils import (
url_or_none,
urlencode_postdata,
urljoin,
- traverse_obj,
)
@@ -451,6 +451,7 @@ class VKIE(VKBaseIE):
info_page, 'view count', default=None))
formats = []
+ subtitles = {}
for format_id, format_url in data.items():
format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
@@ -462,12 +463,21 @@ class VKIE(VKBaseIE):
formats.append({
'format_id': format_id,
'url': format_url,
+ 'ext': 'mp4',
+ 'source_preference': 1,
'height': height,
})
- elif format_id == 'hls':
- formats.extend(self._extract_m3u8_formats(
+ elif format_id.startswith('hls') and format_id != 'hls_live_playback':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False, live=is_live))
+ m3u8_id=format_id, fatal=False, live=is_live)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'):
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id=format_id, fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
elif format_id == 'rtmp':
formats.append({
'format_id': format_id,
@@ -475,7 +485,6 @@ class VKIE(VKBaseIE):
'ext': 'flv',
})
- subtitles = {}
for sub in data.get('subs') or {}:
subtitles.setdefault(sub.get('lang', 'en'), []).append({
'ext': sub.get('title', '.srt').split('.')[-1],
@@ -496,6 +505,7 @@ class VKIE(VKBaseIE):
'comment_count': int_or_none(mv_data.get('commcount')),
'is_live': is_live,
'subtitles': subtitles,
+ '_format_sort_fields': ('res', 'source'),
}
diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py
deleted file mode 100644
index ef77bed..0000000
--- a/yt_dlp/extractor/voot.py
+++ /dev/null
@@ -1,212 +0,0 @@
-import json
-import time
-import uuid
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..networking.exceptions import HTTPError
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- jwt_decode_hs256,
- parse_age_limit,
- traverse_obj,
- try_call,
- try_get,
- unified_strdate,
-)
-
-
-class VootBaseIE(InfoExtractor):
- _NETRC_MACHINE = 'voot'
- _GEO_BYPASS = False
- _LOGIN_HINT = 'Log in with "-u <email_address> -p <password>", or use "-u token -p <auth_token>" to login with auth token.'
- _TOKEN = None
- _EXPIRY = 0
- _API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'}
-
- def _perform_login(self, username, password):
- if self._TOKEN and self._EXPIRY:
- return
-
- if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
- VootBaseIE._TOKEN = password
- VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp']
- self.report_login()
-
- # Mobile number as username is not supported
- elif not username.isdigit():
- check_username = self._download_json(
- 'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({
- 'type': 'email',
- 'email': username
- }, separators=(',', ':')).encode(), headers={
- **self._API_HEADERS,
- 'Content-Type': 'application/json;charset=utf-8',
- }, note='Checking username', expected_status=403)
- if not traverse_obj(check_username, ('isExist', {bool})):
- if traverse_obj(check_username, ('status', 'code', {int})) == 9999:
- self.raise_geo_restricted(countries=['IN'])
- raise ExtractorError('Incorrect username', expected=True)
- auth_token = traverse_obj(self._download_json(
- 'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({
- 'type': 'traditional',
- 'deviceId': str(uuid.uuid4()),
- 'deviceBrand': 'PC/MAC',
- 'data': {
- 'email': username,
- 'password': password
- }
- }, separators=(',', ':')).encode(), headers={
- **self._API_HEADERS,
- 'Content-Type': 'application/json;charset=utf-8',
- }, note='Logging in', expected_status=400), ('data', 'authToken', {dict}))
- if not auth_token:
- raise ExtractorError('Incorrect password', expected=True)
- VootBaseIE._TOKEN = auth_token['accessToken']
- VootBaseIE._EXPIRY = auth_token['expirationTime']
-
- else:
- raise ExtractorError(self._LOGIN_HINT, expected=True)
-
- def _check_token_expiry(self):
- if int(time.time()) >= self._EXPIRY:
- raise ExtractorError('Access token has expired', expected=True)
-
- def _real_initialize(self):
- if not self._TOKEN:
- self.raise_login_required(self._LOGIN_HINT, method=None)
- self._check_token_expiry()
-
-
-class VootIE(VootBaseIE):
- _WORKING = False
- _VALID_URL = r'''(?x)
- (?:
- voot:|
- https?://(?:www\.)?voot\.com/?
- (?:
- movies?/[^/]+/|
- (?:shows|kids)/(?:[^/]+/){4}
- )
- )
- (?P<id>\d{3,})
- '''
- _TESTS = [{
- 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
- 'info_dict': {
- 'id': '441353',
- 'ext': 'mp4',
- 'title': 'Is this the end of Kamini?',
- 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
- 'timestamp': 1472103000,
- 'upload_date': '20160825',
- 'series': 'Ishq Ka Rang Safed',
- 'season_number': 1,
- 'episode': 'Is this the end of Kamini?',
- 'episode_number': 340,
- 'release_date': '20160825',
- 'season': 'Season 1',
- 'age_limit': 13,
- 'duration': 1146.0,
- },
- 'params': {'skip_download': 'm3u8'},
- }, {
- 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
- 'only_matching': True,
- }, {
- 'url': 'https://www.voot.com/movies/pandavas-5/424627',
- 'only_matching': True,
- }, {
- 'url': 'https://www.voot.com/movie/fight-club/621842',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- media_info = self._download_json(
- 'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id,
- query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN})
-
- try:
- m3u8_url = self._download_json(
- 'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id,
- 'Downloading playback JSON', data=b'{}', headers={
- **self.geo_verification_headers(),
- **self._API_HEADERS,
- 'Content-Type': 'application/json;charset=utf-8',
- 'platform': 'androidwebdesktop',
- 'vootid': video_id,
- 'voottoken': self._TOKEN,
- })['m3u8']
- except ExtractorError as e:
- if isinstance(e.cause, HTTPError) and e.cause.status == 400:
- self._check_token_expiry()
- raise
-
- formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
- self._remove_duplicate_formats(formats)
-
- return {
- 'id': video_id,
- # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
- 'formats': traverse_obj(formats, (
- lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
- 'http_headers': self._API_HEADERS,
- **traverse_obj(media_info, ('result', 0, {
- 'title': ('fullTitle', {str}),
- 'description': ('fullSynopsis', {str}),
- 'series': ('showName', {str}),
- 'season_number': ('season', {int_or_none}),
- 'episode': ('fullTitle', {str}),
- 'episode_number': ('episode', {int_or_none}),
- 'timestamp': ('uploadTime', {int_or_none}),
- 'release_date': ('telecastDate', {unified_strdate}),
- 'age_limit': ('ageNemonic', {parse_age_limit}),
- 'duration': ('duration', {float_or_none}),
- })),
- }
-
-
-class VootSeriesIE(VootBaseIE):
- _WORKING = False
- _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})'
- _TESTS = [{
- 'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',
- 'playlist_mincount': 442,
- 'info_dict': {
- 'id': '100002',
- },
- }, {
- 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/100003',
- 'playlist_mincount': 341,
- 'info_dict': {
- 'id': '100003',
- },
- }]
- _SHOW_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/season-by-show?sort=season%3Aasc&id={}&responseType=common'
- _SEASON_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/series-wise-episode?sort=episode%3Aasc&id={}&responseType=common&page={:d}'
-
- def _entries(self, show_id):
- show_json = self._download_json(self._SHOW_API.format(show_id), video_id=show_id)
- for season in show_json.get('result', []):
- page_num = 1
- season_id = try_get(season, lambda x: x['id'], compat_str)
- season_json = self._download_json(self._SEASON_API.format(season_id, page_num),
- video_id=season_id,
- note='Downloading JSON metadata page %d' % page_num)
- episodes_json = season_json.get('result', [])
- while episodes_json:
- page_num += 1
- for episode in episodes_json:
- video_id = episode.get('id')
- yield self.url_result(
- 'voot:%s' % video_id, ie=VootIE.ie_key(), video_id=video_id)
- episodes_json = self._download_json(self._SEASON_API.format(season_id, page_num),
- video_id=season_id,
- note='Downloading JSON metadata page %d' % page_num)['result']
-
- def _real_extract(self, url):
- show_id = self._match_id(url)
- return self.playlist_result(self._entries(show_id), playlist_id=show_id)
diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py
index a1a9c17..3ac0f83 100644
--- a/yt_dlp/extractor/walla.py
+++ b/yt_dlp/extractor/walla.py
@@ -2,8 +2,8 @@ import re
from .common import InfoExtractor
from ..utils import (
- xpath_text,
int_or_none,
+ xpath_text,
)
diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py
index 74501b1..1cfed2d 100644
--- a/yt_dlp/extractor/washingtonpost.py
+++ b/yt_dlp/extractor/washingtonpost.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import traverse_obj
diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py
index f80f140..0b7ddd2 100644
--- a/yt_dlp/extractor/wdr.py
+++ b/yt_dlp/extractor/wdr.py
@@ -6,16 +6,16 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
+ ExtractorError,
determine_ext,
dict_get,
- ExtractorError,
js_to_json,
strip_jsonp,
try_get,
unified_strdate,
update_url_query,
- urlhandle_detect_ext,
url_or_none,
+ urlhandle_detect_ext,
)
diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py
index 2fca745..b6a6593 100644
--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@@ -1,6 +1,6 @@
+import itertools
import json
import random
-import itertools
import urllib.parse
from .common import InfoExtractor
diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py
index f2808cd..492891d 100644
--- a/yt_dlp/extractor/whowatch.py
+++ b/yt_dlp/extractor/whowatch.py
@@ -1,12 +1,12 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ ExtractorError,
int_or_none,
qualities,
try_call,
try_get,
- ExtractorError,
)
-from ..compat import compat_str
class WhoWatchIE(InfoExtractor):
diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py
index f9bf092..d7d77c0 100644
--- a/yt_dlp/extractor/wimtv.py
+++ b/yt_dlp/extractor/wimtv.py
@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
determine_ext,
parse_duration,
urlencode_postdata,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py
index 5e590e2..0ef4e8e 100644
--- a/yt_dlp/extractor/wppilot.py
+++ b/yt_dlp/extractor/wppilot.py
@@ -1,13 +1,13 @@
+import json
+import random
+import re
+
from .common import InfoExtractor
from ..utils import (
- try_get,
ExtractorError,
+ try_get,
)
-import json
-import random
-import re
-
class WPPilotBaseIE(InfoExtractor):
_VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s'
diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py
index 145246a..d401d6d 100644
--- a/yt_dlp/extractor/wrestleuniverse.py
+++ b/yt_dlp/extractor/wrestleuniverse.py
@@ -12,6 +12,7 @@ from ..utils import (
jwt_decode_hs256,
traverse_obj,
try_call,
+ url_basename,
url_or_none,
urlencode_postdata,
variadic,
@@ -147,7 +148,7 @@ class WrestleUniverseBaseIE(InfoExtractor):
metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
if not metadata:
webpage = self._download_webpage(url, video_id)
- nextjs_data = self._search_nextjs_data(webpage, video_id)
+ nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
metadata = traverse_obj(nextjs_data, (
'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
return metadata
@@ -194,8 +195,7 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE):
return {
'id': video_id,
- 'formats': self._get_formats(video_data, (
- (('protocolHls', 'url'), ('chromecastUrls', ...)), {url_or_none}), video_id),
+ 'formats': self._get_formats(video_data, ('protocolHls', 'url', {url_or_none}), video_id),
**traverse_obj(metadata, {
'title': ('displayName', {str}),
'description': ('description', {str}),
@@ -259,6 +259,10 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
'params': {
'skip_download': 'm3u8',
},
+ }, {
+ 'note': 'manifest provides live-a (partial) and live-b (full) streams',
+ 'url': 'https://www.wrestle-universe.com/en/lives/umc99R9XsexXrxr9VjTo9g',
+ 'only_matching': True,
}]
_API_PATH = 'events'
@@ -285,12 +289,16 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
video_data, decrypt = self._call_encrypted_api(
video_id, ':watchArchive', 'watch archive', data={'method': 1})
- info['formats'] = self._get_formats(video_data, (
- ('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id)
+ # 'chromecastUrls' can be only partial videos, avoid
+ info['formats'] = self._get_formats(video_data, ('hls', (('urls', ...), 'url'), {url_or_none}), video_id)
for f in info['formats']:
# bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
if f.get('tbr'):
f['tbr'] = int(f['tbr'] / 2.5)
+ # prefer variants with the same basename as the master playlist to avoid partial streams
+ f['format_id'] = url_basename(f['url']).partition('.')[0]
+ if not f['format_id'].startswith(url_basename(f['manifest_url']).partition('.')[0]):
+ f['preference'] = -10
hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
if hls_aes_key:
diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py
index 86e2646..35fe303 100644
--- a/yt_dlp/extractor/wsj.py
+++ b/yt_dlp/extractor/wsj.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- int_or_none,
float_or_none,
+ int_or_none,
unified_strdate,
)
diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py
deleted file mode 100644
index 08c6d6c..0000000
--- a/yt_dlp/extractor/xfileshare.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- decode_packed_codes,
- determine_ext,
- int_or_none,
- js_to_json,
- urlencode_postdata,
-)
-
-
-# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
-def aa_decode(aa_code):
- symbol_table = [
- ('7', '((゚ー゚) + (o^_^o))'),
- ('6', '((o^_^o) +(o^_^o))'),
- ('5', '((゚ー゚) + (゚Θ゚))'),
- ('2', '((o^_^o) - (゚Θ゚))'),
- ('4', '(゚ー゚)'),
- ('3', '(o^_^o)'),
- ('1', '(゚Θ゚)'),
- ('0', '(c^_^o)'),
- ]
- delim = '(゚Д゚)[゚ε゚]+'
- ret = ''
- for aa_char in aa_code.split(delim):
- for val, pat in symbol_table:
- aa_char = aa_char.replace(pat, val)
- aa_char = aa_char.replace('+ ', '')
- m = re.match(r'^\d+', aa_char)
- if m:
- ret += chr(int(m.group(0), 8))
- else:
- m = re.match(r'^u([\da-f]+)', aa_char)
- if m:
- ret += chr(int(m.group(1), 16))
- return ret
-
-
-class XFileShareIE(InfoExtractor):
- _SITES = (
- (r'aparat\.cam', 'Aparat'),
- (r'clipwatching\.com', 'ClipWatching'),
- (r'gounlimited\.to', 'GoUnlimited'),
- (r'govid\.me', 'GoVid'),
- (r'holavid\.com', 'HolaVid'),
- (r'streamty\.com', 'Streamty'),
- (r'thevideobee\.to', 'TheVideoBee'),
- (r'uqload\.com', 'Uqload'),
- (r'vidbom\.com', 'VidBom'),
- (r'vidlo\.us', 'vidlo'),
- (r'vidlocker\.xyz', 'VidLocker'),
- (r'vidshare\.tv', 'VidShare'),
- (r'vup\.to', 'VUp'),
- (r'wolfstream\.tv', 'WolfStream'),
- (r'xvideosharing\.com', 'XVideoSharing'),
- )
-
- IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
- _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
- % '|'.join(site for site in list(zip(*_SITES))[0]))
- _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
-
- _FILE_NOT_FOUND_REGEXES = (
- r'>(?:404 - )?File Not Found<',
- r'>The file was removed by administrator<',
- )
-
- _TESTS = [{
- 'url': 'https://uqload.com/dltx1wztngdz',
- 'md5': '3cfbb65e4c90e93d7b37bcb65a595557',
- 'info_dict': {
- 'id': 'dltx1wztngdz',
- 'ext': 'mp4',
- 'title': 'Rick Astley Never Gonna Give You mp4',
- 'thumbnail': r're:https://.*\.jpg'
- }
- }, {
- 'url': 'http://xvideosharing.com/fq65f94nd2ve',
- 'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
- 'info_dict': {
- 'id': 'fq65f94nd2ve',
- 'ext': 'mp4',
- 'title': 'sample',
- 'thumbnail': r're:http://.*\.jpg',
- },
- }, {
- 'url': 'https://aparat.cam/n4d6dh0wvlpr',
- 'only_matching': True,
- }, {
- 'url': 'https://wolfstream.tv/nthme29v9u2x',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- host, video_id = self._match_valid_url(url).groups()
-
- url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
- webpage = self._download_webpage(url, video_id)
-
- if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- fields = self._hidden_inputs(webpage)
-
- if fields.get('op') == 'download1':
- countdown = int_or_none(self._search_regex(
- r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
- webpage, 'countdown', default=None))
- if countdown:
- self._sleep(countdown, video_id)
-
- webpage = self._download_webpage(
- url, video_id, 'Downloading video page',
- data=urlencode_postdata(fields), headers={
- 'Referer': url,
- 'Content-type': 'application/x-www-form-urlencoded',
- })
-
- title = (self._search_regex(
- (r'style="z-index: [0-9]+;">([^<]+)</span>',
- r'<td nowrap>([^<]+)</td>',
- r'h4-fine[^>]*>([^<]+)<',
- r'>Watch (.+)[ <]',
- r'<h2 class="video-page-head">([^<]+)</h2>',
- r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
- r'title\s*:\s*"([^"]+)"'), # govid.me
- webpage, 'title', default=None) or self._og_search_title(
- webpage, default=None) or video_id).strip()
-
- for regex, func in (
- (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
- (r'(゚.+)', aa_decode)):
- obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
- if obf_code:
- webpage = webpage.replace(obf_code, func(obf_code))
-
- formats = []
-
- jwplayer_data = self._search_regex(
- [
- r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
- r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
- ], webpage,
- 'jwplayer data', default=None)
- if jwplayer_data:
- jwplayer_data = self._parse_json(
- jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
- if jwplayer_data:
- formats = self._parse_jwplayer_data(
- jwplayer_data, video_id, False,
- m3u8_id='hls', mpd_id='dash')['formats']
-
- if not formats:
- urls = []
- for regex in (
- r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
- r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
- r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
- r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
- for mobj in re.finditer(regex, webpage):
- video_url = mobj.group('url')
- if video_url not in urls:
- urls.append(video_url)
-
- sources = self._search_regex(
- r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
- if sources:
- urls.extend(self._parse_json(sources, video_id))
-
- formats = []
- for video_url in urls:
- if determine_ext(video_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls',
- fatal=False))
- else:
- formats.append({
- 'url': video_url,
- 'format_id': 'sd',
- })
-
- thumbnail = self._search_regex(
- [
- r'<video[^>]+poster="([^"]+)"',
- r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
- ], webpage, 'thumbnail', default=None)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- 'http_headers': {'Referer': url}
- }
diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py
index 01ac5dd..0b3a620 100644
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@@ -4,11 +4,11 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
dict_get,
extract_attributes,
- ExtractorError,
float_or_none,
int_or_none,
parse_duration,
diff --git a/yt_dlp/extractor/xiaohongshu.py b/yt_dlp/extractor/xiaohongshu.py
new file mode 100644
index 0000000..faad9d9
--- /dev/null
+++ b/yt_dlp/extractor/xiaohongshu.py
@@ -0,0 +1,83 @@
+import functools
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class XiaoHongShuIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.xiaohongshu\.com/explore/(?P<id>[\da-f]+)'
+ IE_DESC = '小红书'
+ _TESTS = [{
+ 'url': 'https://www.xiaohongshu.com/explore/6411cf99000000001300b6d9',
+ 'md5': '2a87a77ddbedcaeeda8d7eae61b61228',
+ 'info_dict': {
+ 'id': '6411cf99000000001300b6d9',
+ 'ext': 'mp4',
+ 'uploader_id': '5c31698d0000000007018a31',
+ 'description': '#今日快乐今日发[话题]# #吃货薯看这里[话题]# #香妃蛋糕[话题]# #小五卷蛋糕[话题]# #新手蛋糕卷[话题]#',
+ 'title': '香妃蛋糕也太香了吧🔥不需要卷❗️绝对的友好',
+ 'tags': ['今日快乐今日发', '吃货薯看这里', '香妃蛋糕', '小五卷蛋糕', '新手蛋糕卷'],
+ 'duration': 101.726,
+ 'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[a-z0-9]+/[\w]+',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ initial_state = self._search_json(
+ r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', display_id, transform_source=js_to_json)
+
+ note_info = traverse_obj(initial_state, ('note', 'noteDetailMap', display_id, 'note'))
+ video_info = traverse_obj(note_info, ('video', 'media', 'stream', ('h264', 'av1', 'h265'), ...))
+
+ formats = []
+ for info in video_info:
+ format_info = traverse_obj(info, {
+ 'fps': ('fps', {int_or_none}),
+ 'width': ('width', {int_or_none}),
+ 'height': ('height', {int_or_none}),
+ 'vcodec': ('videoCodec', {str}),
+ 'acodec': ('audioCodec', {str}),
+ 'abr': ('audioBitrate', {int_or_none}),
+ 'vbr': ('videoBitrate', {int_or_none}),
+ 'audio_channels': ('audioChannels', {int_or_none}),
+ 'tbr': ('avgBitrate', {int_or_none}),
+ 'format': ('qualityType', {str}),
+ 'filesize': ('size', {int_or_none}),
+ 'duration': ('duration', {functools.partial(float_or_none, scale=1000)})
+ })
+
+ formats.extend(traverse_obj(info, (('mediaUrl', ('backupUrls', ...)), {
+ lambda u: url_or_none(u) and {'url': u, **format_info}})))
+
+ thumbnails = []
+ for image_info in traverse_obj(note_info, ('imageList', ...)):
+ thumbnail_info = traverse_obj(image_info, {
+ 'height': ('height', {int_or_none}),
+ 'width': ('width', {int_or_none}),
+ })
+ for thumb_url in traverse_obj(image_info, (('urlDefault', 'urlPre'), {url_or_none})):
+ thumbnails.append({
+ 'url': thumb_url,
+ **thumbnail_info,
+ })
+
+ return {
+ 'id': display_id,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'title': self._html_search_meta(['og:title'], webpage, default=None),
+ **traverse_obj(note_info, {
+ 'title': ('title', {str}),
+ 'description': ('desc', {str}),
+ 'tags': ('tagList', ..., 'name', {str}),
+ 'uploader_id': ('user', 'userId', {str}),
+ }),
+ }
diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py
index 1452aae..74d4f04 100644
--- a/yt_dlp/extractor/xnxx.py
+++ b/yt_dlp/extractor/xnxx.py
@@ -2,9 +2,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ NO_DEFAULT,
determine_ext,
int_or_none,
- NO_DEFAULT,
str_to_int,
)
diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py
index 8dd1cd9..322e865 100644
--- a/yt_dlp/extractor/xstream.py
+++ b/yt_dlp/extractor/xstream.py
@@ -2,11 +2,11 @@ import re
from .common import InfoExtractor
from ..utils import (
+ find_xpath_attr,
int_or_none,
parse_iso8601,
- xpath_with_ns,
xpath_text,
- find_xpath_attr,
+ xpath_with_ns,
)
diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py
index 59eef84..6b16ac2 100644
--- a/yt_dlp/extractor/xvideos.py
+++ b/yt_dlp/extractor/xvideos.py
@@ -3,9 +3,9 @@ import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
int_or_none,
parse_duration,
)
@@ -173,8 +173,41 @@ class XVideosIE(InfoExtractor):
class XVideosQuickiesIE(InfoExtractor):
IE_NAME = 'xvideos:quickies'
- _VALID_URL = r'https?://(?P<domain>(?:[^/]+\.)?xvideos2?\.com)/amateur-channels/[^#]+#quickies/a/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?P<domain>(?:[^/?#]+\.)?xvideos2?\.com)/(?:profiles/|amateur-channels/)?[^/?#]+#quickies/a/(?P<id>\w+)'
_TESTS = [{
+ 'url': 'https://www.xvideos.com/lili_love#quickies/a/ipdtikh1a4c',
+ 'md5': 'f9e4f518ff1de14b99a400bbd0fc5ee0',
+ 'info_dict': {
+ 'id': 'ipdtikh1a4c',
+ 'ext': 'mp4',
+ 'title': 'Mexican chichóna putisima',
+ 'age_limit': 18,
+ 'duration': 81,
+ 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+ }
+ }, {
+ 'url': 'https://www.xvideos.com/profiles/lili_love#quickies/a/ipphaob6fd1',
+ 'md5': '5340938aac6b46e19ebdd1d84535862e',
+ 'info_dict': {
+ 'id': 'ipphaob6fd1',
+ 'ext': 'mp4',
+ 'title': 'Puta chichona mexicana squirting',
+ 'age_limit': 18,
+ 'duration': 56,
+ 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+ }
+ }, {
+ 'url': 'https://www.xvideos.com/amateur-channels/lili_love#quickies/a/hfmffmd7661',
+ 'md5': '92428518bbabcb4c513e55922e022491',
+ 'info_dict': {
+ 'id': 'hfmffmd7661',
+ 'ext': 'mp4',
+ 'title': 'Chichona mexican slut',
+ 'age_limit': 18,
+ 'duration': 9,
+ 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
+ }
+ }, {
'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
'md5': '16e322a93282667f1963915568f782c1',
'info_dict': {
@@ -189,4 +222,4 @@ class XVideosQuickiesIE(InfoExtractor):
def _real_extract(self, url):
domain, id_ = self._match_valid_url(url).group('domain', 'id')
- return self.url_result(f'https://{domain}/video{id_}/_', XVideosIE, id_)
+ return self.url_result(f'https://{domain}/video{"" if id_.isdecimal() else "."}{id_}/_', XVideosIE, id_)
diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py
index e3e3a9f..aa6c84d 100644
--- a/yt_dlp/extractor/xxxymovies.py
+++ b/yt_dlp/extractor/xxxymovies.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
- parse_duration,
int_or_none,
+ parse_duration,
)
diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py
index 794dc3e..acfe69b 100644
--- a/yt_dlp/extractor/yandexmusic.py
+++ b/yt_dlp/extractor/yandexmusic.py
@@ -5,8 +5,8 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
- int_or_none,
float_or_none,
+ int_or_none,
try_get,
)
diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py
index 4382a56..95a9446 100644
--- a/yt_dlp/extractor/yandexvideo.py
+++ b/yt_dlp/extractor/yandexvideo.py
@@ -259,15 +259,15 @@ class ZenYandexIE(InfoExtractor):
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
data_json = self._search_json(
r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
- serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)',
- webpage, 'server state').replace('State', 'Settings')
+ serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state')
uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
webpage, 'uploader', default='<a>')
uploader_name = extract_attributes(uploader).get('aria-label')
- video_json = try_get(data_json, lambda x: x[serverstate]['exportData']['video'], dict)
- stream_urls = try_get(video_json, lambda x: x['video']['streams'])
+ item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
+ video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
+
formats, subtitles = [], {}
- for s_url in stream_urls:
+ for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})):
ext = determine_ext(s_url)
if ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py
index 6ee0abc..0e047aa 100644
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@@ -1,19 +1,27 @@
+import itertools
import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
+ clean_html,
extract_attributes,
+ get_element_by_class,
+ get_element_by_id,
+ get_elements_html_by_class,
int_or_none,
merge_dicts,
- str_to_int,
+ parse_count,
+ parse_qs,
traverse_obj,
unified_strdate,
url_or_none,
+ urljoin,
)
class YouPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?/?(?:[#?]|$)'
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
_TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
@@ -34,7 +42,7 @@ class YouPornIE(InfoExtractor):
'tags': list,
'age_limit': 18,
},
- 'skip': 'This video has been disabled',
+ 'skip': 'This video has been deactivated',
}, {
# Unknown uploader
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@@ -72,15 +80,14 @@ class YouPornIE(InfoExtractor):
'id': '16290308',
'age_limit': 18,
'categories': [],
- 'description': 'md5:00ea70f642f431c379763c17c2f396bc',
'display_id': 'tinderspecial-trailer1',
'duration': 298.0,
'ext': 'mp4',
'upload_date': '20201123',
'uploader': 'Ersties',
'tags': [],
- 'thumbnail': 'https://fi1.ypncdn.com/202011/23/16290308/original/8/tinderspecial-trailer1-8(m=eaAaaEPbaaaa).jpg',
- 'timestamp': 1606089600,
+ 'thumbnail': r're:https://.+\.jpg',
+ 'timestamp': 1606147564,
'title': 'Tinder In Real Life',
'view_count': int,
}
@@ -88,11 +95,27 @@ class YouPornIE(InfoExtractor):
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
- definitions = self._download_json(
- f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
+ self._set_cookie('.youporn.com', 'age_verified', '1')
+ webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id)
- def get_format_data(data, f):
- return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
+ watchable = self._search_regex(
+ r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
+ webpage, 'watchability', default=None)
+ if not watchable:
+ msg = re.split(r'\s{2}', clean_html(get_element_by_id('mainContent', webpage)) or '')[0]
+ raise ExtractorError(
+ f'{self.IE_NAME} says: {msg}' if msg else 'Video unavailable', expected=True)
+
+ player_vars = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)
+ definitions = player_vars['mediaDefinitions']
+
+ def get_format_data(data, stream_type):
+ info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
+ if not info_url:
+ return []
+ return traverse_obj(
+ self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
+ lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
formats = []
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
@@ -123,10 +146,6 @@ class YouPornIE(InfoExtractor):
f['height'] = height
formats.append(f)
- webpage = self._download_webpage(
- 'http://www.youporn.com/watch/%s' % video_id, display_id,
- headers={'Cookie': 'age_verified=1'})
-
title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
webpage, 'title', default=None) or self._og_search_title(
@@ -141,8 +160,10 @@ class YouPornIE(InfoExtractor):
thumbnail = self._search_regex(
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
webpage, 'thumbnail', fatal=False, group='thumbnail')
- duration = int_or_none(self._html_search_meta(
- 'video:duration', webpage, 'duration', fatal=False))
+ duration = traverse_obj(player_vars, ('duration', {int_or_none}))
+ if duration is None:
+ duration = int_or_none(self._html_search_meta(
+ 'video:duration', webpage, 'duration', fatal=False))
uploader = self._html_search_regex(
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
@@ -158,11 +179,11 @@ class YouPornIE(InfoExtractor):
view_count = None
views = self._search_regex(
- r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
- 'views', default=None)
+ r'(<div [^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
+ webpage, 'views', default=None)
if views:
- view_count = str_to_int(extract_attributes(views).get('data-value'))
- comment_count = str_to_int(self._search_regex(
+ view_count = parse_count(extract_attributes(views).get('data-value'))
+ comment_count = parse_count(self._search_regex(
r'>All [Cc]omments? \(([\d,.]+)\)',
webpage, 'comment count', default=None))
@@ -180,7 +201,8 @@ class YouPornIE(InfoExtractor):
data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False)
data.pop('url', None)
- return merge_dicts(data, {
+
+ result = merge_dicts(data, {
'id': video_id,
'display_id': display_id,
'title': title,
@@ -196,3 +218,350 @@ class YouPornIE(InfoExtractor):
'age_limit': age_limit,
'formats': formats,
})
+
+ # Remove SEO spam "description"
+ description = result.get('description')
+ if description and description.startswith(f'Watch {result.get("title")} online'):
+ del result['description']
+
+ return result
+
+
+class YouPornListBase(InfoExtractor):
+ def _get_next_url(self, url, pl_id, html):
+ return urljoin(url, self._search_regex(
+ r'''<a [^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
+ get_element_by_id('next', html) or '', 'next page',
+ group='url', default=None))
+
+ @classmethod
+ def _get_title_from_slug(cls, title_slug):
+ return re.sub(r'[_-]', ' ', title_slug)
+
+ def _entries(self, url, pl_id, html=None, page_num=None):
+ start = page_num or 1
+ for page in itertools.count(start):
+ if not html:
+ html = self._download_webpage(
+ url, pl_id, note=f'Downloading page {page}', fatal=page == start)
+ if not html:
+ return
+ for element in get_elements_html_by_class('video-title', html):
+ if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})):
+ yield self.url_result(video_url)
+
+ if page_num is not None:
+ return
+ next_url = self._get_next_url(url, pl_id, html)
+ if not next_url or next_url == url:
+ return
+ url = next_url
+ html = None
+
+ def _real_extract(self, url, html=None):
+ m_dict = self._match_valid_url(url).groupdict()
+ pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
+ qs = {k: v[-1] for k, v in parse_qs(url).items() if v}
+
+ base_id = pl_id or 'YouPorn'
+ title = self._get_title_from_slug(base_id)
+ if page_type:
+ title = f'{page_type.capitalize()} {title}'
+ base_id = [base_id.lower()]
+ if sort is None:
+ title += ' videos'
+ else:
+ title = f'{title} videos by {re.sub(r"[_-]", " ", sort)}'
+ base_id.append(sort)
+ if qs:
+ filters = list(map('='.join, sorted(qs.items())))
+ title += f' ({",".join(filters)})'
+ base_id.extend(filters)
+ pl_id = '/'.join(base_id)
+
+ return self.playlist_result(
+ self._entries(url, pl_id, html=html, page_num=int_or_none(qs.get('page'))),
+ playlist_id=pl_id, playlist_title=title)
+
+
+class YouPornCategoryIE(YouPornListBase):
+ IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?P<type>category)/(?P<id>[^/?#&]+)
+ (?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/category/popular-with-women/popular/',
+ 'info_dict': {
+ 'id': 'popular-with-women/popular',
+ 'title': 'Category popular with women videos by popular',
+ },
+ 'playlist_mincount': 39,
+ }, {
+ 'note': 'Filtered paginated list with single page result',
+ 'url': 'https://www.youporn.com/category/popular-with-women/duration/?min_minutes=10',
+ 'info_dict': {
+ 'id': 'popular-with-women/duration/min_minutes=10',
+ 'title': 'Category popular with women videos by duration (min_minutes=10)',
+ },
+ 'playlist_mincount': 2,
+ # 'playlist_maxcount': 30,
+ }, {
+ 'note': 'Single page of full list',
+ 'url': 'https://www.youporn.com/category/popular-with-women/popular?page=1',
+ 'info_dict': {
+ 'id': 'popular-with-women/popular/page=1',
+ 'title': 'Category popular with women videos by popular (page=1)',
+ },
+ 'playlist_count': 36,
+ }]
+
+
+class YouPornChannelIE(YouPornListBase):
+ IE_DESC = 'YouPorn channel, with sorting and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?P<type>channel)/(?P<id>[^/?#&]+)
+ (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/channel/x-feeds/',
+ 'info_dict': {
+ 'id': 'x-feeds',
+ 'title': 'Channel X-Feeds videos',
+ },
+ 'playlist_mincount': 37,
+ }, {
+ 'note': 'Single page of full list (no filters here)',
+ 'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
+ 'info_dict': {
+ 'id': 'x-feeds/duration/page=1',
+ 'title': 'Channel X-Feeds videos by duration (page=1)',
+ },
+ 'playlist_count': 24,
+ }]
+
+ @staticmethod
+ def _get_title_from_slug(title_slug):
+ return re.sub(r'_', ' ', title_slug).title()
+
+
+class YouPornCollectionIE(YouPornListBase):
+ IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?P<type>collection)s/videos/(?P<id>\d+)
+ (?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/collections/videos/33044251/',
+ 'info_dict': {
+ 'id': '33044251',
+ 'title': 'Collection Sexy Lips videos',
+ 'uploader': 'ph-littlewillyb',
+ },
+ 'playlist_mincount': 50,
+ }, {
+ 'note': 'Single page of full list (no filters here)',
+ 'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
+ 'info_dict': {
+ 'id': '33044251/time/page=1',
+ 'title': 'Collection Sexy Lips videos by time (page=1)',
+ 'uploader': 'ph-littlewillyb',
+ },
+ 'playlist_count': 20,
+ }]
+
+ def _real_extract(self, url):
+ pl_id = self._match_id(url)
+ html = self._download_webpage(url, pl_id)
+ playlist = super()._real_extract(url, html=html)
+ infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
+ 'collection-infos', html)) or '')
+ title, uploader = self._search_regex(
+ r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
+ infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
+ if title:
+ playlist.update({
+ 'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
+ 'uploader': uploader,
+ })
+
+ return playlist
+
+
+class YouPornTagIE(YouPornListBase):
+ IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ porn(?P<type>tag)s/(?P<id>[^/?#&]+)
+ (?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/porntags/austrian',
+ 'info_dict': {
+ 'id': 'austrian',
+ 'title': 'Tag austrian videos',
+ },
+ 'playlist_mincount': 33,
+ 'expected_warnings': ['YouPorn tag pages are not correctly cached'],
+ }, {
+ 'note': 'Filtered paginated list with single page result',
+ 'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
+ 'info_dict': {
+ 'id': 'austrian/duration/min_minutes=10',
+ 'title': 'Tag austrian videos by duration (min_minutes=10)',
+ },
+ 'playlist_mincount': 10,
+ # number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
+ # or more, varying with number of ads; let's set max as 9x4
+ # NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
+ # 'playlist_maxcount': 32,
+ 'expected_warnings': ['YouPorn tag pages are not correctly cached'],
+ }, {
+ 'note': 'Single page of full list',
+ 'url': 'https://www.youporn.com/porntags/austrian/?page=1',
+ 'info_dict': {
+ 'id': 'austrian/page=1',
+ 'title': 'Tag austrian videos (page=1)',
+ },
+ 'playlist_mincount': 32,
+ # 'playlist_maxcount': 34,
+ 'expected_warnings': ['YouPorn tag pages are not correctly cached'],
+ }]
+
+ def _real_extract(self, url):
+ self.report_warning(
+ 'YouPorn tag pages are not correctly cached and '
+ 'often return incorrect results', only_once=True)
+ return super()._real_extract(url)
+
+
+class YouPornStarIE(YouPornListBase):
+ IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?P<type>pornstar)/(?P<id>[^/?#&]+)
+ (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination',
+ 'url': 'https://www.youporn.com/pornstar/daynia/',
+ 'info_dict': {
+ 'id': 'daynia',
+ 'title': 'Pornstar Daynia videos',
+ 'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
+ },
+ 'playlist_mincount': 40,
+ }, {
+ 'note': 'Single page of full list (no filters here)',
+ 'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
+ 'info_dict': {
+ 'id': 'daynia/page=1',
+ 'title': 'Pornstar Daynia videos (page=1)',
+ 'description': 're:.{180,}',
+ },
+ 'playlist_count': 26,
+ }]
+
+ @staticmethod
+ def _get_title_from_slug(title_slug):
+ return re.sub(r'_', ' ', title_slug).title()
+
+ def _real_extract(self, url):
+ pl_id = self._match_id(url)
+ html = self._download_webpage(url, pl_id)
+ playlist = super()._real_extract(url, html=html)
+ INFO_ELEMENT_RE = r'''(?x)
+ <div [^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
+ (?P<info>[\s\S]+?)(?:</div>\s*){6,}
+ '''
+
+ if infos := self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default=''):
+ infos = re.sub(
+ r'(?:\s*nl=nl)+\s*', ' ',
+ re.sub(r'(?u)\s+', ' ', clean_html(re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
+
+ return {
+ **playlist,
+ 'description': infos.strip() or None,
+ }
+
+
+class YouPornVideosIE(YouPornListBase):
+ IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?youporn\.com/
+ (?:(?P<id>browse)/)?
+ (?P<sort>(?(id)
+ (?:duration|rating|time|views)|
+ (?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
+ (?:[/#?]|$)
+ '''
+ _TESTS = [{
+ 'note': 'Full list with pagination (too long for test)',
+ 'url': 'https://www.youporn.com/',
+ 'info_dict': {
+ 'id': 'youporn',
+ 'title': 'YouPorn videos',
+ },
+ 'only_matching': True,
+ }, {
+ 'note': 'Full list with pagination (too long for test)',
+ 'url': 'https://www.youporn.com/recommended',
+ 'info_dict': {
+ 'id': 'youporn/recommended',
+ 'title': 'YouPorn videos by recommended',
+ },
+ 'only_matching': True,
+ }, {
+ 'note': 'Full list with pagination (too long for test)',
+ 'url': 'https://www.youporn.com/top_rated',
+ 'info_dict': {
+ 'id': 'youporn/top_rated',
+ 'title': 'YouPorn videos by top rated',
+ },
+ 'only_matching': True,
+ }, {
+ 'note': 'Full list with pagination (too long for test)',
+ 'url': 'https://www.youporn.com/browse/time',
+ 'info_dict': {
+ 'id': 'browse/time',
+ 'title': 'YouPorn videos by time',
+ },
+ 'only_matching': True,
+ }, {
+ 'note': 'Filtered paginated list with single page result',
+ 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
+ 'info_dict': {
+ 'id': 'youporn/most_favorited/max_minutes=2/res=VR',
+ 'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
+ },
+ 'playlist_mincount': 10,
+ # 'playlist_maxcount': 28,
+ }, {
+ 'note': 'Filtered paginated list with several pages',
+ 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
+ 'info_dict': {
+ 'id': 'youporn/most_favorited/max_minutes=5/res=VR',
+ 'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
+ },
+ 'playlist_mincount': 45,
+ }, {
+ 'note': 'Single page of full list',
+ 'url': 'https://www.youporn.com/browse/time?page=1',
+ 'info_dict': {
+ 'id': 'browse/time/page=1',
+ 'title': 'YouPorn videos by time (page=1)',
+ },
+ 'playlist_count': 36,
+ }]
+
+ @staticmethod
+ def _get_title_from_slug(title_slug):
+ return 'YouPorn' if title_slug == 'browse' else title_slug
diff --git a/yt_dlp/extractor/yourporn.py b/yt_dlp/extractor/yourporn.py
deleted file mode 100644
index 38f42a9..0000000
--- a/yt_dlp/extractor/yourporn.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- parse_duration,
- urljoin,
-)
-
-
-class YourPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
- _TESTS = [{
- 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
- 'md5': '6f8682b6464033d87acaa7a8ff0c092e',
- 'info_dict': {
- 'id': '57ffcb2e1179b',
- 'ext': 'mp4',
- 'title': 'md5:c9f43630bd968267672651ba905a7d35',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 165,
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- parts = self._parse_json(
- self._search_regex(
- r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
- group='data'),
- video_id)[video_id].split('/')
-
- num = 0
- for c in parts[6] + parts[7]:
- if c.isnumeric():
- num += int(c)
- parts[5] = compat_str(int(parts[5]) - num)
- parts[1] += '8'
- video_url = urljoin(url, '/'.join(parts))
-
- title = (self._search_regex(
- r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
- default=None) or self._og_search_description(webpage)).strip()
- thumbnail = self._og_search_thumbnail(webpage)
- duration = parse_duration(self._search_regex(
- r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
- default=None))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'age_limit': 18,
- 'ext': 'mp4',
- }
diff --git a/yt_dlp/extractor/yourupload.py b/yt_dlp/extractor/yourupload.py
deleted file mode 100644
index def6329..0000000
--- a/yt_dlp/extractor/yourupload.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from .common import InfoExtractor
-from ..utils import urljoin
-
-
-class YourUploadIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:yourupload\.com/(?:watch|embed)|embed\.yourupload\.com)/(?P<id>[A-Za-z0-9]+)'
- _TESTS = [{
- 'url': 'http://yourupload.com/watch/14i14h',
- 'md5': '5e2c63385454c557f97c4c4131a393cd',
- 'info_dict': {
- 'id': '14i14h',
- 'ext': 'mp4',
- 'title': 'BigBuckBunny_320x180.mp4',
- 'thumbnail': r're:^https?://.*\.jpe?g',
- }
- }, {
- 'url': 'http://www.yourupload.com/embed/14i14h',
- 'only_matching': True,
- }, {
- 'url': 'http://embed.yourupload.com/14i14h',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- embed_url = 'http://www.yourupload.com/embed/%s' % video_id
-
- webpage = self._download_webpage(embed_url, video_id)
-
- title = self._og_search_title(webpage)
- video_url = urljoin(embed_url, self._og_search_video_url(webpage))
- thumbnail = self._og_search_thumbnail(webpage, default=None)
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': video_url,
- 'thumbnail': thumbnail,
- 'http_headers': {
- 'Referer': embed_url,
- },
- }
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index e553fff..54da4e3 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -240,6 +240,16 @@ INNERTUBE_CLIENTS = {
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 85
},
+ # This client has pre-merged video+audio 720p/1080p streams
+ 'mediaconnect': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'MEDIA_CONNECT_FRONTEND',
+ 'clientVersion': '0.1',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 95
+ },
}
@@ -1171,7 +1181,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
- _formats = {
+ _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
@@ -1315,6 +1325,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
+ 'timestamp': 1349198244,
}
},
{
@@ -1358,6 +1369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
+ 'timestamp': 1349198244,
},
'params': {
'skip_download': True,
@@ -1444,6 +1456,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1401991663,
},
},
{
@@ -1503,6 +1516,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Projekt Melody',
'uploader_url': 'https://www.youtube.com/@ProjektMelody',
'uploader_id': '@ProjektMelody',
+ 'timestamp': 1577508724,
},
},
{
@@ -1608,6 +1622,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@Olympics',
'uploader_id': '@Olympics',
'channel_is_verified': True,
+ 'timestamp': 1440707674,
},
'params': {
'skip_download': 'requires avconv',
@@ -1641,6 +1656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': '孫ᄋᄅ',
'uploader_url': 'https://www.youtube.com/@AllenMeow',
'uploader_id': '@AllenMeow',
+ 'timestamp': 1299776999,
},
},
# url_encoded_fmt_stream_map is empty string
@@ -1784,6 +1800,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
}],
'params': {'skip_download': True},
+ 'skip': 'Not multifeed anymore',
},
{
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
@@ -1892,6 +1909,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'The Berkman Klein Center for Internet & Society',
'uploader_id': '@BKCHarvard',
'uploader_url': 'https://www.youtube.com/@BKCHarvard',
+ 'timestamp': 1422422076,
},
'params': {
'skip_download': True,
@@ -1927,6 +1945,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@BernieSanders',
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1447987198,
},
'params': {
'skip_download': True,
@@ -1990,6 +2009,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@Vsauce',
'comment_count': int,
'channel_is_verified': True,
+ 'timestamp': 1484761047,
},
'params': {
'skip_download': True,
@@ -2145,6 +2165,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'l\'Or Vert asbl',
'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
'uploader_id': '@ElevageOrVert',
+ 'timestamp': 1497343210,
},
'params': {
'skip_download': True,
@@ -2183,6 +2204,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@Csharp-video-tutorialsBlogspot',
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1377976349,
},
'params': {
'skip_download': True,
@@ -2265,6 +2287,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@CBSMornings',
'comment_count': int,
'channel_is_verified': True,
+ 'timestamp': 1405513526,
}
},
{
@@ -2282,7 +2305,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'view_count': int,
'channel': 'Walk around Japan',
'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
- 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
+ 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
'age_limit': 0,
'availability': 'public',
'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
@@ -2292,6 +2315,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Walk around Japan',
'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
'uploader_id': '@walkaroundjapan7124',
+ 'timestamp': 1605884416,
},
'params': {
'skip_download': True,
@@ -2343,6 +2367,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'format': '17', # 3gp format available on android
'extractor_args': {'youtube': {'player_client': ['android']}},
},
+ 'skip': 'android client broken',
},
{
# Skip download of additional client configs (remix client config in this case)
@@ -2386,6 +2411,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1395685455,
}, 'params': {'format': 'mhtml', 'skip_download': True}
}, {
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@@ -2415,38 +2441,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
'uploader_id': '@LeonNguyen',
'heatmap': 'count:100',
+ 'timestamp': 1641170939,
}
}, {
- # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
- 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
- 'info_dict': {
- 'id': '2NUZ8W2llS4',
- 'ext': 'mp4',
- 'title': 'The NP that test your phone performance 🙂',
- 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
- 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
- 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
- 'duration': 21,
- 'view_count': int,
- 'age_limit': 0,
- 'categories': ['Gaming'],
- 'tags': 'count:23',
- 'playable_in_embed': True,
- 'live_status': 'not_live',
- 'upload_date': '20220102',
- 'like_count': int,
- 'availability': 'public',
- 'channel': 'Leon Nguyen',
- 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
- 'comment_count': int,
- 'channel_follower_count': int,
- 'uploader': 'Leon Nguyen',
- 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
- 'uploader_id': '@LeonNguyen',
- 'heatmap': 'count:100',
- },
- 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
- }, {
# date text is premiered video, ensure upload date in UTC (published 1641172509)
'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
'info_dict': {
@@ -2477,38 +2474,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1641172509,
}
},
- { # continuous livestream. Microformat upload date should be preferred.
- # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
- 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
+ { # continuous livestream.
+ # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
+ 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
'info_dict': {
- 'id': 'kgx4WGK0oNU',
- 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'id': 'jfKfPfyJRdk',
'ext': 'mp4',
- 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
- 'availability': 'public',
+ 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
+ 'like_count': int,
+ 'uploader': 'Lofi Girl',
+ 'categories': ['Music'],
+ 'concurrent_view_count': int,
+ 'playable_in_embed': True,
+ 'timestamp': 1657627949,
+ 'release_date': '20220712',
+ 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
+ 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
'age_limit': 0,
- 'release_timestamp': 1637975704,
- 'upload_date': '20210619',
- 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
- 'live_status': 'is_live',
- 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
- 'channel': 'Abao in Tokyo',
+ 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
+ 'release_timestamp': 1657641570,
+ 'uploader_url': 'https://www.youtube.com/@LofiGirl',
'channel_follower_count': int,
- 'release_date': '20211127',
- 'tags': 'count:39',
- 'categories': ['People & Blogs'],
- 'like_count': int,
+ 'channel_is_verified': True,
+ 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
'view_count': int,
- 'playable_in_embed': True,
- 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
- 'concurrent_view_count': int,
- 'uploader': 'Abao in Tokyo',
- 'uploader_url': 'https://www.youtube.com/@abaointokyo',
- 'uploader_id': '@abaointokyo',
+ 'live_status': 'is_live',
+ 'tags': 'count:32',
+ 'channel': 'Lofi Girl',
+ 'availability': 'public',
+ 'upload_date': '20220712',
+ 'uploader_id': '@LofiGirl',
},
- 'params': {'skip_download': True}
+ 'params': {'skip_download': True},
}, {
'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
'info_dict': {
@@ -2534,6 +2534,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@lesmiscore',
'uploader': 'Lesmiscore',
'uploader_url': 'https://www.youtube.com/@lesmiscore',
+ 'timestamp': 1648005313,
}
}, {
# Prefer primary title+description language metadata by default
@@ -2561,6 +2562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@coletdjnz',
'uploader': 'cole-dlp-test-acc',
+ 'timestamp': 1662677394,
},
'params': {'skip_download': True}
}, {
@@ -2574,7 +2576,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': 5,
'live_status': 'not_live',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
- 'upload_date': '20220728',
+ 'upload_date': '20220729',
'view_count': int,
'categories': ['People & Blogs'],
'thumbnail': r're:^https?://.*\.jpg',
@@ -2587,6 +2589,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@coletdjnz',
'uploader': 'cole-dlp-test-acc',
+ 'timestamp': 1659073275,
+ 'like_count': int,
},
'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
'expected_warnings': [r'Preferring "fr" translated fields'],
@@ -2652,6 +2656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Projekt Melody',
'uploader_id': '@ProjektMelody',
'uploader_url': 'https://www.youtube.com/@ProjektMelody',
+ 'timestamp': 1577508724,
},
'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
},
@@ -2686,6 +2691,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': '@sana_natori',
'channel_is_verified': True,
'heatmap': 'count:100',
+ 'timestamp': 1671798112,
},
},
{
@@ -2720,7 +2726,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'heatmap': 'count:100',
},
'params': {
- 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
+ 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
},
},
]
@@ -2755,6 +2761,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
'uploader_id': '@ChristopherSykesDocumentaries',
'heatmap': 'count:100',
+ 'timestamp': 1211825920,
},
'params': {
'skip_download': True,
@@ -3307,7 +3314,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'value': ('intensityScoreNormalized', {float_or_none}),
})) or None
- def _extract_comment(self, comment_renderer, parent=None):
+ def _extract_comment(self, entities, parent=None):
+ comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
+ if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
+ return
+
+ toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
+ time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
+
+ return {
+ 'id': comment_id,
+ 'parent': parent or 'root',
+ **traverse_obj(comment_entity_payload, {
+ 'text': ('properties', 'content', 'content', {str}),
+ 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
+ 'author_id': ('author', 'channelId', {self.ucid_or_none}),
+ 'author': ('author', 'displayName', {str}),
+ 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
+ 'author_is_uploader': ('author', 'isCreator', {bool}),
+ 'author_is_verified': ('author', 'isVerified', {bool}),
+ 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
+ ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url')
+ ), {lambda x: urljoin('https://www.youtube.com', x)}),
+ }, get_all=False),
+ 'is_favorited': (None if toolbar_entity_payload is None else
+ toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
+ '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
+ 'timestamp': self._parse_time_text(time_text),
+ }
+
+ def _extract_comment_old(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
if not comment_id:
return
@@ -3388,21 +3424,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
return _continuation
- def extract_thread(contents):
+ def extract_thread(contents, entity_payloads):
if not parent:
tracker['current_page_thread'] = 0
for content in contents:
if not parent and tracker['total_parent_comments'] >= max_parents:
yield
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
- comment_renderer = get_first(
- (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
- expected_type=dict, default={})
- comment = self._extract_comment(comment_renderer, parent)
+ # old comment format
+ if not entity_payloads:
+ comment_renderer = get_first(
+ (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
+ expected_type=dict, default={})
+
+ comment = self._extract_comment_old(comment_renderer, parent)
+
+ # new comment format
+ else:
+ view_model = (
+ traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
+ or traverse_obj(content, ('commentViewModel', {dict})))
+ comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
+ if not comment_keys:
+ continue
+ entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
+ comment = self._extract_comment(entities, parent)
+ if comment:
+ comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
+
if not comment:
continue
comment_id = comment['id']
+
if comment.get('is_pinned'):
tracker['pinned_comment_ids'].add(comment_id)
# Sometimes YouTube may break and give us infinite looping comments.
@@ -3495,7 +3549,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
check_get_keys = None
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
check_get_keys = [[*continuation_items_path, ..., (
- 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
+ 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
try:
response = self._extract_response(
item_id=None, query=continuation,
@@ -3519,6 +3573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise
is_forced_continuation = False
continuation = None
+ mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
if is_first_continuation:
continuation = extract_header(continuation_items)
@@ -3527,7 +3582,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
continue
- for entry in extract_thread(continuation_items):
+ for entry in extract_thread(continuation_items, mutations):
if not entry:
return
yield entry
@@ -3604,8 +3659,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
yt_query = {
'videoId': video_id,
}
- if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
- yt_query['params'] = 'CgIIAQ=='
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
if pp_arg:
@@ -3621,19 +3674,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
- default = ['ios', 'android', 'web']
+ android_clients = []
+ default = ['ios', 'web']
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
for client in self._configuration_arg('player_client'):
- if client in allowed_clients:
- requested_clients.append(client)
- elif client == 'default':
+ if client == 'default':
requested_clients.extend(default)
elif client == 'all':
requested_clients.extend(allowed_clients)
- else:
+ elif client not in allowed_clients:
self.report_warning(f'Skipping unsupported client {client}')
+ elif client.startswith('android'):
+ android_clients.append(client)
+ else:
+ requested_clients.append(client)
+ # Force deprioritization of broken Android clients for format de-duplication
+ requested_clients.extend(android_clients)
if not requested_clients:
requested_clients = default
@@ -3852,6 +3910,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
+ # Android client formats are broken due to integrity check enforcement
+ # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
+ is_broken = client_name and client_name.startswith(short_client_name('android'))
+ if is_broken:
+ self.report_warning(
+ f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
+ 'They will be deprioritized', only_once=True)
+
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
fps = int_or_none(fmt.get('fps')) or 0
dct = {
@@ -3864,7 +3930,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
- throttled and 'THROTTLED', is_damaged and 'DAMAGED',
+ throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
(self.get_param('verbose') or all_formats) and client_name,
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
@@ -3882,8 +3948,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else '') or None,
'language_preference': language_preference,
- # Strictly de-prioritize damaged and 3gp formats
- 'preference': -10 if is_damaged else -2 if itag == '17' else None,
+ # Strictly de-prioritize broken, damaged and 3gp formats
+ 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
@@ -4552,19 +4618,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
})
+
+ # We only want timestamp IF it has time precision AND a timezone
+ # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
+ timestamp = (
+ parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
+ or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
+ )
+ upload_date = (
+ dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
+ (
+ unified_strdate(get_first(microformats, 'uploadDate'))
+ or unified_strdate(search_meta('uploadDate'))
+ ))
+
+ # In the case we cannot get the timestamp:
# The upload date for scheduled, live and past live streams / premieres in microformats
# may be different from the stream date. Although not in UTC, we will prefer it in this case.
# See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
- upload_date = (
- unified_strdate(get_first(microformats, 'uploadDate'))
- or unified_strdate(search_meta('uploadDate')))
- if not upload_date or (
- live_status in ('not_live', None)
- and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
- ):
+ if not upload_date or (not timestamp and live_status in ('not_live', None)):
+ # this should be in UTC, as configured in the cookie/client context
upload_date = strftime_or_none(
self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
+
info['upload_date'] = upload_date
+ info['timestamp'] = timestamp
if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
# Newly uploaded videos' HLS formats are potentially problematic and need to be checked
diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py
index 88f526b..2a12aa5 100644
--- a/yt_dlp/extractor/zapiks.py
+++ b/yt_dlp/extractor/zapiks.py
@@ -2,11 +2,11 @@ import re
from .common import InfoExtractor
from ..utils import (
+ int_or_none,
parse_duration,
parse_iso8601,
- xpath_with_ns,
xpath_text,
- int_or_none,
+ xpath_with_ns,
)
diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py
index c24b338..18b22a5 100644
--- a/yt_dlp/extractor/zhihu.py
+++ b/yt_dlp/extractor/zhihu.py
@@ -1,5 +1,5 @@
from .common import InfoExtractor
-from ..utils import format_field, float_or_none, int_or_none
+from ..utils import float_or_none, format_field, int_or_none
class ZhihuIE(InfoExtractor):
diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py
index ff5eac8..909a7a3 100644
--- a/yt_dlp/extractor/zingmp3.py
+++ b/yt_dlp/extractor/zingmp3.py
@@ -10,8 +10,8 @@ from ..utils import (
int_or_none,
join_nonempty,
try_call,
+ url_or_none,
urljoin,
- url_or_none
)
from ..utils.traversal import traverse_obj
diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py
index 2f3b4c4..8d3156d 100644
--- a/yt_dlp/extractor/zype.py
+++ b/yt_dlp/extractor/zype.py
@@ -3,8 +3,8 @@ import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
- dict_get,
ExtractorError,
+ dict_get,
int_or_none,
js_to_json,
parse_iso8601,
diff --git a/yt_dlp/networking/_curlcffi.py b/yt_dlp/networking/_curlcffi.py
index 39d1f70..f2df399 100644
--- a/yt_dlp/networking/_curlcffi.py
+++ b/yt_dlp/networking/_curlcffi.py
@@ -21,7 +21,7 @@ from .exceptions import (
TransportError,
)
from .impersonate import ImpersonateRequestHandler, ImpersonateTarget
-from ..dependencies import curl_cffi
+from ..dependencies import curl_cffi, certifi
from ..utils import int_or_none
if curl_cffi is None:
@@ -132,6 +132,16 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
extensions.pop('cookiejar', None)
extensions.pop('timeout', None)
+ def send(self, request: Request) -> Response:
+ target = self._get_request_target(request)
+ try:
+ response = super().send(request)
+ except HTTPError as e:
+ e.response.extensions['impersonate'] = target
+ raise
+ response.extensions['impersonate'] = target
+ return response
+
def _send(self, request: Request):
max_redirects_exceeded = False
session: curl_cffi.requests.Session = self._get_instance(
@@ -156,6 +166,13 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
# See: https://curl.se/libcurl/c/CURLOPT_HTTPPROXYTUNNEL.html
session.curl.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)
+ # curl_cffi does not currently set these for proxies
+ session.curl.setopt(CurlOpt.PROXY_CAINFO, certifi.where())
+
+ if not self.verify:
+ session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYPEER, 0)
+ session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYHOST, 0)
+
headers = self._get_impersonate_headers(request)
if self._client_cert:
@@ -203,7 +220,10 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
max_redirects_exceeded = True
curl_response = e.response
- elif e.code == CurlECode.PROXY:
+ elif (
+ e.code == CurlECode.PROXY
+ or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e))
+ ):
raise ProxyError(cause=e) from e
else:
raise TransportError(cause=e) from e
diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py
index e3edc77..6397a2c 100644
--- a/yt_dlp/networking/_requests.py
+++ b/yt_dlp/networking/_requests.py
@@ -28,6 +28,7 @@ import requests.adapters
import requests.utils
import urllib3.connection
import urllib3.exceptions
+import urllib3.util
from ._helper import (
InstanceStoreMixin,
@@ -180,10 +181,25 @@ class RequestsHTTPAdapter(requests.adapters.HTTPAdapter):
extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context
return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs)
+ # Skip `requests` internal verification; we use our own SSLContext
+ # requests 2.31.0+
def cert_verify(*args, **kwargs):
- # lean on SSLContext for cert verification
pass
+ # requests 2.31.0-2.32.1
+ def _get_connection(self, request, *_, proxies=None, **__):
+ return self.get_connection(request.url, proxies)
+
+ # requests 2.32.2+: Reimplementation without `_urllib3_request_context`
+ def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None):
+ url = urllib3.util.parse_url(request.url).url
+
+ manager = self.poolmanager
+ if proxy := select_proxy(url, proxies):
+ manager = self.proxy_manager_for(proxy)
+
+ return manager.connection_from_url(url)
+
class RequestsSession(requests.sessions.Session):
"""
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
index 4c66ba6..d473e16 100644
--- a/yt_dlp/networking/common.py
+++ b/yt_dlp/networking/common.py
@@ -31,6 +31,8 @@ from ..utils import (
)
from ..utils.networking import HTTPHeaderDict, normalize_url
+DEFAULT_TIMEOUT = 20
+
def register_preference(*handlers: type[RequestHandler]):
assert all(issubclass(handler, RequestHandler) for handler in handlers)
@@ -235,7 +237,7 @@ class RequestHandler(abc.ABC):
self._logger = logger
self.headers = headers or {}
self.cookiejar = cookiejar if cookiejar is not None else YoutubeDLCookieJar()
- self.timeout = float(timeout or 20)
+ self.timeout = float(timeout or DEFAULT_TIMEOUT)
self.proxies = proxies or {}
self.source_address = source_address
self.verbose = verbose
@@ -497,6 +499,7 @@ class Response(io.IOBase):
@param headers: response headers.
@param status: Response HTTP status code. Default is 200 OK.
@param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
+ @param extensions: Dictionary of handler-specific response extensions.
"""
def __init__(
@@ -505,7 +508,9 @@ class Response(io.IOBase):
url: str,
headers: Mapping[str, str],
status: int = 200,
- reason: str = None):
+ reason: str = None,
+ extensions: dict = None
+ ):
self.fp = fp
self.headers = Message()
@@ -517,6 +522,7 @@ class Response(io.IOBase):
self.reason = reason or HTTPStatus(status).phrase
except ValueError:
self.reason = None
+ self.extensions = extensions or {}
def readable(self):
return self.fp.readable()
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index faa1ee5..997b575 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -478,7 +478,7 @@ def create_parser():
}, 'aliases': {
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
- '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
+ '2021': ['2022', 'no-certifi', 'filename-sanitization'],
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
'2023': [],
}
diff --git a/yt_dlp/update.py b/yt_dlp/update.py
index f47cbc5..ca70f69 100644
--- a/yt_dlp/update.py
+++ b/yt_dlp/update.py
@@ -69,6 +69,10 @@ def _get_variant_and_executable_path():
# Ref: https://en.wikipedia.org/wiki/Uname#Examples
if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
machine = '_x86' if platform.architecture()[0][:2] == '32' else ''
+ # sys.executable returns a /tmp/ path for staticx builds (linux_static)
+ # Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information
+ if static_exe_path := os.getenv('STATICX_PROG_PATH'):
+ path = static_exe_path
return f'{remove_end(sys.platform, "32")}{machine}_exe', path
path = os.path.dirname(__file__)
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index e3e80f3..42803bb 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -1134,7 +1134,7 @@ def is_path_like(f):
return isinstance(f, (str, bytes, os.PathLike))
-def extract_timezone(date_str):
+def extract_timezone(date_str, default=None):
m = re.search(
r'''(?x)
^.{8,}? # >=8 char non-TZ prefix, if present
@@ -1146,21 +1146,25 @@ def extract_timezone(date_str):
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
$)
''', date_str)
+ timezone = None
+
if not m:
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
if timezone is not None:
date_str = date_str[:-len(m.group('tz'))]
- timezone = dt.timedelta(hours=timezone or 0)
+ timezone = dt.timedelta(hours=timezone)
else:
date_str = date_str[:-len(m.group('tz'))]
- if not m.group('sign'):
- timezone = dt.timedelta()
- else:
+ if m.group('sign'):
sign = 1 if m.group('sign') == '+' else -1
timezone = dt.timedelta(
hours=sign * int(m.group('hours')),
minutes=sign * int(m.group('minutes')))
+
+ if timezone is None and default is not NO_DEFAULT:
+ timezone = default or dt.timedelta()
+
return timezone, date_str
@@ -1172,10 +1176,9 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
date_str = re.sub(r'\.[0-9]+', '', date_str)
- if timezone is None:
- timezone, date_str = extract_timezone(date_str)
+ timezone, date_str = extract_timezone(date_str, timezone)
- with contextlib.suppress(ValueError):
+ with contextlib.suppress(ValueError, TypeError):
date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
dt_ = dt.datetime.strptime(date_str, date_format) - timezone
return calendar.timegm(dt_.timetuple())
@@ -1638,16 +1641,14 @@ def get_filesystem_encoding():
return encoding if encoding is not None else 'utf-8'
-_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'})
+_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
_CMD_QUOTE_TRANS = str.maketrans({
# Keep quotes balanced by replacing them with `""` instead of `\\"`
'"': '""',
- # Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`)
+ # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
# `=` should be unique since variables containing `=` cannot be set using cmd
'\n': '%=%',
- # While we are only required to escape backslashes immediately before quotes,
- # we instead escape all of 'em anyways to be consistent
- '\\': '\\\\',
+ '\r': '%=%',
# Use zero length variable replacement so `%` doesn't get expanded
# `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
'%': '%%cd:~,%',
@@ -1656,19 +1657,14 @@ _CMD_QUOTE_TRANS = str.maketrans({
def shell_quote(args, *, shell=False):
args = list(variadic(args))
- if any(isinstance(item, bytes) for item in args):
- deprecation_warning('Passing bytes to utils.shell_quote is deprecated')
- encoding = get_filesystem_encoding()
- for index, item in enumerate(args):
- if isinstance(item, bytes):
- args[index] = item.decode(encoding)
if compat_os_name != 'nt':
return shlex.join(args)
trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
return ' '.join(
- s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""')
+ s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
+ else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
for s in args)
@@ -2529,7 +2525,7 @@ def read_batch_urls(batch_fd):
return False
# "#" cannot be stripped out since it is part of the URI
# However, it can be safely stripped out if following a whitespace
- return re.split(r'\s#', url, 1)[0].rstrip()
+ return re.split(r'\s#', url, maxsplit=1)[0].rstrip()
with contextlib.closing(batch_fd) as fd:
return [url for url in map(fixup, fd) if url]
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 22c2c04..415dc0e 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
-__version__ = '2024.04.09'
+__version__ = '2024.05.26'
-RELEASE_GIT_HEAD = 'ff07792676f404ffff6ee61b5638c9dc1a33a37a'
+RELEASE_GIT_HEAD = 'ae2af1104f80caf2f47544763a33db2c17a3e1de'
VARIANT = None
@@ -12,4 +12,4 @@ CHANNEL = 'stable'
ORIGIN = 'yt-dlp/yt-dlp'
-_pkg_version = '2024.04.09'
+_pkg_version = '2024.05.26'