diff options
Diffstat (limited to 'yt_dlp/extractor/nbc.py')
-rw-r--r-- | yt_dlp/extractor/nbc.py | 29 |
1 files changed, 14 insertions, 15 deletions
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index e88f98a..8f6fb22 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -1,12 +1,12 @@ import base64 import json import re +import urllib.parse import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor from .theplatform import ThePlatformIE, default_ns -from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..utils import ( ExtractorError, @@ -16,6 +16,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + join_nonempty, mimetype2ext, parse_age_limit, parse_duration, @@ -148,12 +149,12 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE # Percent escaped url 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189', 'only_matching': True, - } + }, ] def _real_extract(self, url): permalink, video_id = self._match_valid_url(url).groups() - permalink = 'http' + compat_urllib_parse_unquote(permalink) + permalink = 'http' + urllib.parse.unquote(permalink) video_data = self._download_json( 'https://friendship.nbc.co/v2/graphql', video_id, query={ 'query': '''query bonanzaPage( @@ -201,7 +202,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'switch': 'HLSServiceSecure', } video_id = video_data['mpxGuid'] - tp_path = 'NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id) + tp_path = 'NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id) tpm = self._download_theplatform_metadata(tp_path, video_id) title = tpm.get('title') or video_data.get('secondaryTitle') if video_data.get('locked'): @@ -211,7 +212,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE query['auth'] = self._extract_mvpd_auth( url, video_id, 'nbcentertainment', resource) theplatform_url = smuggle_url(update_url_query( - 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id), + 'http://link.theplatform.com/s/NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id), query), {'force_smil_url': True}) # Empty string or 0 can be valid values for these. So the check must be `is None` @@ -253,7 +254,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/' _VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' - _EMBED_REGEX = [r'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE] + _EMBED_REGEX = [rf'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>{_VALID_URL_BASE}[^\"]+)'] _TESTS = [{ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI', @@ -267,8 +268,8 @@ class NBCSportsVPlayerIE(InfoExtractor): 'uploader': 'NBCU-SPORTS', 'duration': 72.818, 'chapters': [], - 'thumbnail': r're:^https?://.*\.jpg$' - } + 'thumbnail': r're:^https?://.*\.jpg$', + }, }, { 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2', 'only_matching': True, @@ -301,7 +302,7 @@ class NBCSportsIE(InfoExtractor): 'chapters': [], 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg', 'duration': 528.395, - } + }, }, { # data-mpx-src 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot', @@ -339,7 +340,7 @@ class NBCSportsStreamIE(AdobePassIE): def _real_extract(self, url): video_id = self._match_id(url) live_source = self._download_json( - 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id, + f'http://stream.nbcsports.com/data/live_sources_{video_id}.json', video_id) video_source = live_source['videoSources'][0] title = video_source['title'] @@ -498,10 +499,8 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE m3u8_id=format_id, fatal=False)) continue tbr = int_or_none(va.get('bitrate'), 1000) - if tbr: - format_id += '-%d' % tbr formats.append({ - 'format_id': format_id, + 'format_id': join_nonempty(format_id, tbr), 'url': public_url, 'width': int_or_none(va.get('width')), 'height': int_or_none(va.get('height')), @@ -568,7 +567,7 @@ class NBCOlympicsIE(InfoExtractor): except RegexNotFoundError: theplatform_url = self._search_regex( r"([\"'])embedUrl\1: *([\"'])(?P<embedUrl>.+)\2", - webpage, 'embedding URL', group="embedUrl") + webpage, 'embedding URL', group='embedUrl') return { '_type': 'url_transparent', @@ -623,7 +622,7 @@ class NBCOlympicsStreamIE(AdobePassIE): source_url = self._download_json( f'https://api-leap.nbcsports.com/feeds/assets/{pid}?application=NBCOlympics&platform=desktop&format=nbc-player&env=staging', - pid, 'Downloading leap config' + pid, 'Downloading leap config', )['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl'] if event_config.get('cdnToken'): |