diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 17:37:42 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 17:37:42 +0000 |
commit | c7bab7c39fd51c0812f70020172766303191bc01 (patch) | |
tree | 56c05fbdd4fc47409d48ba318a4b621a7b0d299a /yt_dlp/extractor/techtalks.py | |
parent | Initial commit. (diff) | |
download | yt-dlp-c7bab7c39fd51c0812f70020172766303191bc01.tar.xz yt-dlp-c7bab7c39fd51c0812f70020172766303191bc01.zip |
Adding upstream version 2023.03.04.upstream/2023.03.04upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'yt_dlp/extractor/techtalks.py')
-rw-r--r-- | yt_dlp/extractor/techtalks.py | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/yt_dlp/extractor/techtalks.py b/yt_dlp/extractor/techtalks.py new file mode 100644 index 0000000..d37de36 --- /dev/null +++ b/yt_dlp/extractor/techtalks.py @@ -0,0 +1,80 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + get_element_by_attribute, + clean_html, +) + + +class TechTalksIE(InfoExtractor): + _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', + 'info_dict': { + 'id': '57758', + 'title': 'Learning Topic Models --- Going beyond SVD', + }, + 'playlist': [ + { + 'info_dict': { + 'id': '57758', + 'ext': 'flv', + 'title': 'Learning Topic Models --- Going beyond SVD', + }, + }, + { + 'info_dict': { + 'id': '57758-slides', + 'ext': 'flv', + 'title': 'Learning Topic Models --- Going beyond SVD', + }, + }, + ], + 'params': { + # rtmp download + 'skip_download': True, + }, + }, { + 'url': 'http://techtalks.tv/talks/57758', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + talk_id = mobj.group('id') + webpage = self._download_webpage(url, talk_id) + rtmp_url = self._search_regex( + r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url') + play_path = self._search_regex( + r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', + webpage, 'presenter play path') + title = clean_html(get_element_by_attribute('class', 'title', webpage)) + video_info = { + 'id': talk_id, + 'title': title, + 'url': rtmp_url, + 'play_path': play_path, + 'ext': 'flv', + } + m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage) + if m_slides is None: + return video_info + else: + return { + '_type': 'playlist', + 'id': talk_id, + 'title': title, + 'entries': [ + video_info, + # The slides video + { + 'id': talk_id + '-slides', + 'title': title, + 'url': rtmp_url, + 'play_path': m_slides.group(1), + 'ext': 'flv', + }, + ], + } |