summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/amp.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/amp.py')
-rw-r--r--yt_dlp/extractor/amp.py98
1 files changed, 98 insertions, 0 deletions
diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py
new file mode 100644
index 0000000..b0cbd77
--- /dev/null
+++ b/yt_dlp/extractor/amp.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
+ # parse Akamai Adaptive Media Player feed
+ def _extract_feed_info(self, url):
+ feed = self._download_json(
+ url, None, 'Downloading Akamai AMP feed',
+ 'Unable to download Akamai AMP feed')
+ item = feed.get('channel', {}).get('item')
+ if not item:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
+
+ video_id = item['guid']
+
+ def get_media_node(name, default=None):
+ media_name = 'media-%s' % name
+ media_group = item.get('media-group') or item
+ return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
+
+ thumbnails = []
+ media_thumbnail = get_media_node('thumbnail')
+ if media_thumbnail:
+ if isinstance(media_thumbnail, dict):
+ media_thumbnail = [media_thumbnail]
+ for thumbnail_data in media_thumbnail:
+ thumbnail = thumbnail_data.get('@attributes', {})
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumbnail_url, 'http:'),
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ subtitles = {}
+ media_subtitle = get_media_node('subTitle')
+ if media_subtitle:
+ if isinstance(media_subtitle, dict):
+ media_subtitle = [media_subtitle]
+ for subtitle_data in media_subtitle:
+ subtitle = subtitle_data.get('@attributes', {})
+ subtitle_href = url_or_none(subtitle.get('href'))
+ if not subtitle_href:
+ continue
+ subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
+ 'url': subtitle_href,
+ 'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
+ })
+
+ formats = []
+ media_content = get_media_node('content')
+ if isinstance(media_content, dict):
+ media_content = [media_content]
+ for media_data in media_content:
+ media = media_data.get('@attributes', {})
+ media_url = url_or_none(media.get('url'))
+ if not media_url:
+ continue
+ ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
+ video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
+ 'url': media_url,
+ 'tbr': int_or_none(media.get('bitrate')),
+ 'filesize': int_or_none(media.get('fileSize')),
+ 'ext': ext,
+ })
+
+ timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+
+ return {
+ 'id': video_id,
+ 'title': get_media_node('title'),
+ 'description': get_media_node('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }