summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/lci.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:10:22 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:10:22 +0000
commitbb3682b5a9a4d0e8e45f74de8c21dba3d5e6e0ab (patch)
treed7890656a89a7d2f3497a5793dd65aa746f7cabd /yt_dlp/extractor/lci.py
parentAdding upstream version 2024.04.09. (diff)
downloadyt-dlp-4191033c6e2eeef4fd4eae7175a1d8423430c386.tar.xz
yt-dlp-4191033c6e2eeef4fd4eae7175a1d8423430c386.zip
Adding upstream version 2024.05.26.upstream/2024.05.26
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'yt_dlp/extractor/lci.py')
-rw-r--r--yt_dlp/extractor/lci.py27
1 files changed, 24 insertions, 3 deletions
diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py
index e7d2f8a..708cb54 100644
--- a/yt_dlp/extractor/lci.py
+++ b/yt_dlp/extractor/lci.py
@@ -1,9 +1,25 @@
from .common import InfoExtractor
+from .wat import WatIE
+from ..utils import ExtractorError, int_or_none
+from ..utils.traversal import traverse_obj
class LCIIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P<id>\d+)\.html'
_TESTS = [{
+ 'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html',
+ 'info_dict': {
+ 'id': '14113788',
+ 'ext': 'mp4',
+ 'title': '24H Pujadas du vendredi 24 mai 2024',
+ 'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg',
+ 'upload_date': '20240524',
+ 'duration': 6158,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
'info_dict': {
'id': '13875948',
@@ -24,5 +40,10 @@ class LCIIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id')
- return self.url_result('wat:' + wat_id, 'Wat', wat_id)
+ next_data = self._search_nextjs_data(webpage, video_id)
+ wat_id = traverse_obj(next_data, (
+ 'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any))
+ if wat_id is None:
+ raise ExtractorError('Could not find wat_id')
+
+ return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id))