From 2415e66f889f38503b73e8ebc5f43ca342390e5c Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Mon, 15 Apr 2024 18:49:24 +0200
Subject: Adding upstream version 2024.03.10.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 yt_dlp/extractor/googlesearch.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 yt_dlp/extractor/googlesearch.py

(limited to 'yt_dlp/extractor/googlesearch.py')

diff --git a/yt_dlp/extractor/googlesearch.py b/yt_dlp/extractor/googlesearch.py
new file mode 100644
index 0000000..67ca0e5
--- /dev/null
+++ b/yt_dlp/extractor/googlesearch.py
@@ -0,0 +1,38 @@
+import itertools
+import re
+
+from .common import SearchInfoExtractor
+
+
+class GoogleSearchIE(SearchInfoExtractor):
+    IE_DESC = 'Google Video search'
+    IE_NAME = 'video.google:search'
+    _SEARCH_KEY = 'gvsearch'
+    _TESTS = [{
+        'url': 'gvsearch15:python language',
+        'info_dict': {
+            'id': 'python language',
+            'title': 'python language',
+        },
+        'playlist_count': 15,
+    }]
+    _PAGE_SIZE = 100
+
+    def _search_results(self, query):
+        for pagenum in itertools.count():
+            webpage = self._download_webpage(
+                'http://www.google.com/search', f'gvsearch:{query}',
+                note=f'Downloading result page {pagenum + 1}',
+                query={
+                    'tbm': 'vid',
+                    'q': query,
+                    'start': pagenum * self._PAGE_SIZE,
+                    'num': self._PAGE_SIZE,
+                    'hl': 'en',
+                })
+
+            for url in re.findall(r'<div[^>]* class="dXiKIc"[^>]*><a href="([^"]+)"', webpage):
+                yield self.url_result(url)
+
+            if not re.search(r'id="pnnext"', webpage):
+                return
-- 
cgit v1.2.3