summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/mlb.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 09:07:33 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 09:07:33 +0000
commit1719c758f7efdca9476d2a674b659191dee6344f (patch)
treed378103d8967450628b7254d9ec1f4f91993bd04 /yt_dlp/extractor/mlb.py
parentAdding debian version 2024.07.16-1. (diff)
downloadyt-dlp-1719c758f7efdca9476d2a674b659191dee6344f.tar.xz
yt-dlp-1719c758f7efdca9476d2a674b659191dee6344f.zip
Merging upstream version 2024.07.25.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'yt_dlp/extractor/mlb.py')
-rw-r--r--yt_dlp/extractor/mlb.py225
1 files changed, 181 insertions, 44 deletions
diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py
index 6f67602..230c218 100644
--- a/yt_dlp/extractor/mlb.py
+++ b/yt_dlp/extractor/mlb.py
@@ -1,16 +1,21 @@
+import json
import re
-import urllib.parse
+import time
import uuid
from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
determine_ext,
int_or_none,
join_nonempty,
+ jwt_decode_hs256,
parse_duration,
parse_iso8601,
try_get,
url_or_none,
+ urlencode_postdata,
)
from ..utils.traversal import traverse_obj
@@ -276,81 +281,213 @@ class MLBVideoIE(MLBBaseIE):
class MLBTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
_NETRC_MACHINE = 'mlb'
-
_TESTS = [{
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
'info_dict': {
'id': '661581',
'ext': 'mp4',
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
+ 'release_date': '20220702',
+ 'release_timestamp': 1656792300,
},
'params': {
'skip_download': True,
},
}]
+ _GRAPHQL_INIT_QUERY = '''\
+mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) {
+ initSession(device: $device, clientType: $clientType, experience: $experience) {
+ deviceId
+ sessionId
+ entitlements {
+ code
+ }
+ location {
+ countryCode
+ regionName
+ zipCode
+ latitude
+ longitude
+ }
+ clientExperience
+ features
+ }
+ }'''
+ _GRAPHQL_PLAYBACK_QUERY = '''\
+mutation initPlaybackSession(
+ $adCapabilities: [AdExperienceType]
+ $mediaId: String!
+ $deviceId: String!
+ $sessionId: String!
+ $quality: PlaybackQuality
+ ) {
+ initPlaybackSession(
+ adCapabilities: $adCapabilities
+ mediaId: $mediaId
+ deviceId: $deviceId
+ sessionId: $sessionId
+ quality: $quality
+ ) {
+ playbackSessionId
+ playback {
+ url
+ token
+ expiration
+ cdn
+ }
+ }
+ }'''
+ _APP_VERSION = '7.8.2'
+ _device_id = None
+ _session_id = None
_access_token = None
+ _token_expiry = 0
+
+ @property
+ def _api_headers(self):
+ if (self._token_expiry - 120) <= time.time():
+ self.write_debug('Access token has expired; re-logging in')
+ self._perform_login(*self._get_login_info())
+ return {'Authorization': f'Bearer {self._access_token}'}
def _real_initialize(self):
if not self._access_token:
self.raise_login_required(
'All videos are only available to registered users', method='password')
- def _perform_login(self, username, password):
- data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
- access_token = self._download_json(
- 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
- headers={
- 'User-Agent': 'okhttp/3.12.1',
- 'Content-Type': 'application/x-www-form-urlencoded',
- }, data=data.encode())['access_token']
+ def _set_device_id(self, username):
+ if not self._device_id:
+ self._device_id = self.cache.load(
+ self._NETRC_MACHINE, 'device_ids', default={}).get(username)
+ if self._device_id:
+ return
+ self._device_id = str(uuid.uuid4())
+ self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
- entitlement = self._download_webpage(
- f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None,
- headers={
- 'User-Agent': 'okhttp/3.12.1',
- 'Authorization': f'Bearer {access_token}',
- })
+ def _perform_login(self, username, password):
+ try:
+ self._access_token = self._download_json(
+ 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
+ 'Logging in', 'Unable to log in', headers={
+ 'User-Agent': 'okhttp/3.12.1',
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ }, data=urlencode_postdata({
+ 'grant_type': 'password',
+ 'username': username,
+ 'password': password,
+ 'scope': 'openid offline_access',
+ 'client_id': '0oa3e1nutA1HLzAKG356',
+ }))['access_token']
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 400:
+ raise ExtractorError('Invalid username or password', expected=True)
+ raise
+
+ self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0
+ self._set_device_id(username)
+
+ self._session_id = self._call_api({
+ 'operationName': 'initSession',
+ 'query': self._GRAPHQL_INIT_QUERY,
+ 'variables': {
+ 'device': {
+ 'appVersion': self._APP_VERSION,
+ 'deviceFamily': 'desktop',
+ 'knownDeviceId': self._device_id,
+ 'languagePreference': 'ENGLISH',
+ 'manufacturer': '',
+ 'model': '',
+ 'os': '',
+ 'osVersion': '',
+ },
+ 'clientType': 'WEB',
+ },
+ }, None, 'session ID')['data']['initSession']['sessionId']
- data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
- self._access_token = self._download_json(
- 'https://us.edge.bamgrid.com/token', None,
+ def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True):
+ return self._download_json(
+ 'https://media-gateway.mlb.com/graphql', video_id,
+ f'Downloading {description}', f'Unable to download {description}', fatal=fatal,
headers={
+ **self._api_headers,
'Accept': 'application/json',
- 'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
- 'Content-Type': 'application/x-www-form-urlencoded',
- }, data=data.encode())['access_token']
+ 'Content-Type': 'application/json',
+ 'x-client-name': 'WEB',
+ 'x-client-version': self._APP_VERSION,
+ }, data=json.dumps(data, separators=(',', ':')).encode())
+
+ def _extract_formats_and_subtitles(self, broadcast, video_id):
+ feed = traverse_obj(broadcast, ('homeAway', {str.title}))
+ medium = traverse_obj(broadcast, ('type', {str}))
+ language = traverse_obj(broadcast, ('language', {str.lower}))
+ format_id = join_nonempty(feed, medium, language)
+
+ response = self._call_api({
+ 'operationName': 'initPlaybackSession',
+ 'query': self._GRAPHQL_PLAYBACK_QUERY,
+ 'variables': {
+ 'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'],
+ 'deviceId': self._device_id,
+ 'mediaId': broadcast['mediaId'],
+ 'quality': 'PLACEHOLDER',
+ 'sessionId': self._session_id,
+ },
+ }, video_id, f'{format_id} broadcast JSON', fatal=False)
+
+ playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict}))
+ m3u8_url = traverse_obj(playback, ('url', {url_or_none}))
+ token = traverse_obj(playback, ('token', {str}))
+
+ if not (m3u8_url and token):
+ errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str})))
+ if 'not entitled' in errors:
+ raise ExtractorError(errors, expected=True)
+ elif errors: # Only warn when 'blacked out' since radio formats are available
+ self.report_warning(f'API returned errors for {format_id}: {errors}')
+ else:
+ self.report_warning(f'No formats available for {format_id} broadcast; skipping')
+ return [], {}
+
+ cdn_headers = {'x-cdn-token': token}
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
+ m3u8_id=format_id, fatal=False, headers=cdn_headers)
+ for fmt in fmts:
+ fmt['http_headers'] = cdn_headers
+ fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
+ fmt.setdefault('language', language)
+ if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':
+ fmt['source_preference'] = 10
+
+ return fmts, subs
def _real_extract(self, url):
video_id = self._match_id(url)
- airings = self._download_json(
- f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
- video_id)['data']['Airings']
+ metadata = traverse_obj(self._download_json(
+ 'https://statsapi.mlb.com/api/v1/schedule', video_id, query={
+ 'gamePk': video_id,
+ 'hydrate': 'broadcasts(all),statusFlags',
+ }), ('dates', ..., 'games', lambda _, v: str(v['gamePk']) == video_id and v['broadcasts'], any))
+
+ broadcasts = traverse_obj(metadata, (
+ 'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF'))
formats, subtitles = [], {}
- for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']):
- format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing)
- m3u8_url = traverse_obj(self._download_json(
- airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
- note=f'Downloading {format_id} stream info JSON',
- errnote=f'Failed to download {format_id} stream info, skipping',
- fatal=False, headers={
- 'Authorization': self._access_token,
- 'Accept': 'application/vnd.media-service+json; version=2',
- }), ('stream', 'complete', {url_or_none}))
- if not m3u8_url:
- continue
- f, s = self._extract_m3u8_formats_and_subtitles(
- m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
- formats.extend(f)
- self._merge_subtitles(s, target=subtitles)
+ for broadcast in broadcasts:
+ fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
- 'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
- 'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
+ 'title': join_nonempty(
+ traverse_obj(metadata, ('officialDate', {str})),
+ traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})),
+ delim=' - '),
+ 'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON',
+ 'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})),
'formats': formats,
'subtitles': subtitles,
- 'http_headers': {'Authorization': f'Bearer {self._access_token}'},
}