diff options
Diffstat (limited to 'yt_dlp/extractor/nebula.py')
-rw-r--r-- | yt_dlp/extractor/nebula.py | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py new file mode 100644 index 0000000..81e2f56 --- /dev/null +++ b/yt_dlp/extractor/nebula.py @@ -0,0 +1,274 @@ +import itertools +import json +import urllib.error + +from .common import InfoExtractor +from ..utils import ExtractorError, parse_iso8601 + +_BASE_URL_RE = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)' + + +class NebulaBaseIE(InfoExtractor): + _NETRC_MACHINE = 'watchnebula' + + _nebula_api_token = None + _nebula_bearer_token = None + + def _perform_nebula_auth(self, username, password): + if not username or not password: + self.raise_login_required(method='password') + + data = json.dumps({'email': username, 'password': password}).encode('utf8') + response = self._download_json( + 'https://api.watchnebula.com/api/v1/auth/login/', + data=data, fatal=False, video_id=None, + headers={ + 'content-type': 'application/json', + # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint + 'cookie': '' + }, + note='Logging in to Nebula with supplied credentials', + errnote='Authentication failed or rejected') + if not response or not response.get('key'): + self.raise_login_required(method='password') + + return response['key'] + + def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): + assert method in ('GET', 'POST',) + assert auth_type in ('api', 'bearer',) + + def inner_call(): + authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}' + return self._download_json( + url, video_id, note=note, headers={'Authorization': authorization}, + data=b'' if method == 'POST' else None) + + try: + return inner_call() + except ExtractorError as exc: + # if 401 or 403, attempt credential re-auth and retry + if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403): + self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}') + self._perform_login() + return inner_call() + else: + raise + + def _fetch_nebula_bearer_token(self): + """ + Get a Bearer token for the Nebula API. This will be required to fetch video meta data. + """ + response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/', + method='POST', + note='Authorizing to Nebula') + return response['token'] + + def _fetch_video_formats(self, slug): + stream_info = self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/stream/', + video_id=slug, + auth_type='bearer', + note='Fetching video stream info') + manifest_url = stream_info['manifest'] + return self._extract_m3u8_formats_and_subtitles(manifest_url, slug) + + def _build_video_info(self, episode): + fmts, subs = self._fetch_video_formats(episode['slug']) + channel_slug = episode['channel_slug'] + channel_title = episode['channel_title'] + return { + 'id': episode['zype_id'], + 'display_id': episode['slug'], + 'formats': fmts, + 'subtitles': subs, + 'webpage_url': f'https://nebula.tv/{episode["slug"]}', + 'title': episode['title'], + 'description': episode['description'], + 'timestamp': parse_iso8601(episode['published_at']), + 'thumbnails': [{ + # 'id': tn.get('name'), # this appears to be null + 'url': tn['original'], + 'height': key, + } for key, tn in episode['assets']['thumbnail'].items()], + 'duration': episode['duration'], + 'channel': channel_title, + 'channel_id': channel_slug, + 'channel_url': f'https://nebula.tv/{channel_slug}', + 'uploader': channel_title, + 'uploader_id': channel_slug, + 'uploader_url': f'https://nebula.tv/{channel_slug}', + 'series': channel_title, + 'creator': channel_title, + } + + def _perform_login(self, username=None, password=None): + self._nebula_api_token = self._perform_nebula_auth(username, password) + self._nebula_bearer_token = self._fetch_nebula_bearer_token() + + +class NebulaIE(NebulaBaseIE): + _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)' + _TESTS = [ + { + 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast', + 'md5': '14944cfee8c7beeea106320c47560efc', + 'info_dict': { + 'id': '5c271b40b13fd613090034fd', + 'ext': 'mp4', + 'title': 'That Time Disney Remade Beauty and the Beast', + 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.', + 'upload_date': '20180731', + 'timestamp': 1533009600, + 'channel': 'Lindsay Ellis', + 'channel_id': 'lindsayellis', + 'uploader': 'Lindsay Ellis', + 'uploader_id': 'lindsayellis', + 'timestamp': 1533009600, + 'uploader_url': 'https://nebula.tv/lindsayellis', + 'series': 'Lindsay Ellis', + 'display_id': 'that-time-disney-remade-beauty-and-the-beast', + 'channel_url': 'https://nebula.tv/lindsayellis', + 'creator': 'Lindsay Ellis', + 'duration': 2212, + 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', + }, + }, + { + 'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', + 'md5': 'd05739cf6c38c09322422f696b569c23', + 'info_dict': { + 'id': '5e7e78171aaf320001fbd6be', + 'ext': 'mp4', + 'title': 'Landing Craft - How The Allies Got Ashore', + 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', + 'upload_date': '20200327', + 'timestamp': 1585348140, + 'channel': 'Real Engineering', + 'channel_id': 'realengineering', + 'uploader': 'Real Engineering', + 'uploader_id': 'realengineering', + 'series': 'Real Engineering', + 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', + 'creator': 'Real Engineering', + 'duration': 841, + 'channel_url': 'https://nebula.tv/realengineering', + 'uploader_url': 'https://nebula.tv/realengineering', + 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', + }, + }, + { + 'url': 'https://nebula.tv/videos/money-episode-1-the-draw', + 'md5': 'ebe28a7ad822b9ee172387d860487868', + 'info_dict': { + 'id': '5e779ebdd157bc0001d1c75a', + 'ext': 'mp4', + 'title': 'Episode 1: The Draw', + 'description': r'contains:There’s free money on offer… if the players can all work together.', + 'upload_date': '20200323', + 'timestamp': 1584980400, + 'channel': 'Tom Scott Presents: Money', + 'channel_id': 'tom-scott-presents-money', + 'uploader': 'Tom Scott Presents: Money', + 'uploader_id': 'tom-scott-presents-money', + 'uploader_url': 'https://nebula.tv/tom-scott-presents-money', + 'duration': 825, + 'channel_url': 'https://nebula.tv/tom-scott-presents-money', + 'series': 'Tom Scott Presents: Money', + 'display_id': 'money-episode-1-the-draw', + 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', + 'creator': 'Tom Scott Presents: Money', + }, + }, + { + 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', + 'only_matching': True, + }, + ] + + def _fetch_video_metadata(self, slug): + return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/', + video_id=slug, + auth_type='bearer', + note='Fetching video meta data') + + def _real_extract(self, url): + slug = self._match_id(url) + video = self._fetch_video_metadata(slug) + return self._build_video_info(video) + + +class NebulaSubscriptionsIE(NebulaBaseIE): + IE_NAME = 'nebula:subscriptions' + _VALID_URL = rf'{_BASE_URL_RE}/myshows' + _TESTS = [ + { + 'url': 'https://nebula.tv/myshows', + 'playlist_mincount': 1, + 'info_dict': { + 'id': 'myshows', + }, + }, + ] + + def _generate_playlist_entries(self): + next_url = 'https://content.watchnebula.com/library/video/?page_size=100' + page_num = 1 + while next_url: + channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer', + note=f'Retrieving subscriptions page {page_num}') + for episode in channel['results']: + yield self._build_video_info(episode) + next_url = channel['next'] + page_num += 1 + + def _real_extract(self, url): + return self.playlist_result(self._generate_playlist_entries(), 'myshows') + + +class NebulaChannelIE(NebulaBaseIE): + IE_NAME = 'nebula:channel' + _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)' + _TESTS = [ + { + 'url': 'https://nebula.tv/tom-scott-presents-money', + 'info_dict': { + 'id': 'tom-scott-presents-money', + 'title': 'Tom Scott Presents: Money', + 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', + }, + 'playlist_count': 5, + }, { + 'url': 'https://nebula.tv/lindsayellis', + 'info_dict': { + 'id': 'lindsayellis', + 'title': 'Lindsay Ellis', + 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', + }, + 'playlist_mincount': 2, + }, + ] + + def _generate_playlist_entries(self, collection_id, channel): + episodes = channel['episodes']['results'] + for page_num in itertools.count(2): + for episode in episodes: + yield self._build_video_info(episode) + next_url = channel['episodes']['next'] + if not next_url: + break + channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer', + note=f'Retrieving channel page {page_num}') + episodes = channel['episodes']['results'] + + def _real_extract(self, url): + collection_id = self._match_id(url) + channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/' + channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel') + channel_details = channel['details'] + + return self.playlist_result( + entries=self._generate_playlist_entries(collection_id, channel), + playlist_id=collection_id, + playlist_title=channel_details['title'], + playlist_description=channel_details['description'] + ) |