diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 16:49:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 16:49:24 +0000 |
commit | 2415e66f889f38503b73e8ebc5f43ca342390e5c (patch) | |
tree | ac48ab69d1d96bae3d83756134921e0d90593aa5 /yt_dlp/extractor/vuclip.py | |
parent | Initial commit. (diff) | |
download | yt-dlp-2415e66f889f38503b73e8ebc5f43ca342390e5c.tar.xz yt-dlp-2415e66f889f38503b73e8ebc5f43ca342390e5c.zip |
Adding upstream version 2024.03.10.upstream/2024.03.10
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'yt_dlp/extractor/vuclip.py')
-rw-r--r-- | yt_dlp/extractor/vuclip.py | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/yt_dlp/extractor/vuclip.py b/yt_dlp/extractor/vuclip.py new file mode 100644 index 0000000..0e56298 --- /dev/null +++ b/yt_dlp/extractor/vuclip.py @@ -0,0 +1,68 @@ +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlparse, +) +from ..utils import ( + ExtractorError, + parse_duration, + remove_end, +) + + +class VuClipIE(InfoExtractor): + _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)' + + _TEST = { + 'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247§ion=recommend', + 'info_dict': { + 'id': '1129900602', + 'ext': '3gp', + 'title': 'Top 10 TV Convicts', + 'duration': 733, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + ad_m = re.search( + r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage) + if ad_m: + urlr = compat_urllib_parse_urlparse(url) + adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1) + webpage = self._download_webpage( + adfree_url, video_id, note='Download post-ad page') + + error_msg = self._html_search_regex( + r'<p class="message">(.*?)</p>', webpage, 'error message', + default=None) + if error_msg: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error_msg), expected=True) + + # These clowns alternate between two page types + video_url = self._search_regex( + r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif', + webpage, 'video URL', default=None) + if video_url: + formats = [{ + 'url': video_url, + }] + else: + formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats'] + + title = remove_end(self._html_search_regex( + r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video') + + duration = parse_duration(self._html_search_regex( + r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False)) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'duration': duration, + } |