diff options
Diffstat (limited to 'yt_dlp/extractor/joj.py')
-rw-r--r-- | yt_dlp/extractor/joj.py | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py new file mode 100644 index 0000000..ea46042 --- /dev/null +++ b/yt_dlp/extractor/joj.py @@ -0,0 +1,108 @@ +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + format_field, + int_or_none, + js_to_json, + try_get, +) + + +class JojIE(InfoExtractor): + _VALID_URL = r'''(?x) + (?: + joj:| + https?://media\.joj\.sk/embed/ + ) + (?P<id>[^/?#^]+) + ''' + _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1'] + _TESTS = [{ + 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932', + 'info_dict': { + 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', + 'ext': 'mp4', + 'title': 'NOVÉ BÝVANIE', + 'thumbnail': r're:^https?://.*?$', + 'duration': 3118, + } + }, { + 'url': 'https://media.joj.sk/embed/CSM0Na0l0p1', + 'info_dict': { + 'id': 'CSM0Na0l0p1', + 'ext': 'mp4', + 'height': 576, + 'title': 'Extrémne rodiny 2 - POKRAČOVANIE (2012/04/09 21:30:00)', + 'duration': 3937, + 'thumbnail': r're:^https?://.*?$', + } + }, { + 'url': 'https://media.joj.sk/embed/9i1cxv', + 'only_matching': True, + }, { + 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932', + 'only_matching': True, + }, { + 'url': 'joj:9i1cxv', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://media.joj.sk/embed/%s' % video_id, video_id) + + title = (self._search_json(r'videoTitle\s*:', webpage, 'title', video_id, + contains_pattern=r'["\'].+["\']', default=None) + or self._html_extract_title(webpage, default=None) + or self._og_search_title(webpage)) + + bitrates = self._parse_json( + self._search_regex( + r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) + + formats = [] + for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []: + if isinstance(format_url, compat_str): + height = self._search_regex( + r'(\d+)[pP]|(pal)\.', format_url, 'height', default=None) + if height == 'pal': + height = 576 + formats.append({ + 'url': format_url, + 'format_id': format_field(height, None, '%sp'), + 'height': int_or_none(height), + }) + if not formats: + playlist = self._download_xml( + 'https://media.joj.sk/services/Video.php?clip=%s' % video_id, + video_id) + for file_el in playlist.findall('./files/file'): + path = file_el.get('path') + if not path: + continue + format_id = file_el.get('id') or file_el.get('label') + formats.append({ + 'url': 'http://n16.joj.sk/storage/%s' % path.replace( + 'dat/', '', 1), + 'format_id': format_id, + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', format_id or path, 'height', + default=None)), + }) + + thumbnail = self._og_search_thumbnail(webpage) + + duration = int_or_none(self._search_regex( + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } |