diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:10:23 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 03:10:23 +0000 |
commit | 96011c9a0b8a4f10e6e06f76acc380d6a802bbee (patch) | |
tree | 30ded50e296e5d936800d19ada594982f10111d6 /yt_dlp/extractor/lci.py | |
parent | Adding debian version 2024.04.09-1. (diff) | |
download | yt-dlp-96011c9a0b8a4f10e6e06f76acc380d6a802bbee.tar.xz yt-dlp-96011c9a0b8a4f10e6e06f76acc380d6a802bbee.zip |
Merging upstream version 2024.05.26.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'yt_dlp/extractor/lci.py')
-rw-r--r-- | yt_dlp/extractor/lci.py | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py index e7d2f8a..708cb54 100644 --- a/yt_dlp/extractor/lci.py +++ b/yt_dlp/extractor/lci.py @@ -1,9 +1,25 @@ from .common import InfoExtractor +from .wat import WatIE +from ..utils import ExtractorError, int_or_none +from ..utils.traversal import traverse_obj class LCIIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P<id>\d+)\.html' _TESTS = [{ + 'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html', + 'info_dict': { + 'id': '14113788', + 'ext': 'mp4', + 'title': '24H Pujadas du vendredi 24 mai 2024', + 'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg', + 'upload_date': '20240524', + 'duration': 6158, + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', 'info_dict': { 'id': '13875948', @@ -24,5 +40,10 @@ class LCIIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id') - return self.url_result('wat:' + wat_id, 'Wat', wat_id) + next_data = self._search_nextjs_data(webpage, video_id) + wat_id = traverse_obj(next_data, ( + 'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any)) + if wat_id is None: + raise ExtractorError('Could not find wat_id') + + return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id)) |