summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/common.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--yt_dlp/extractor/common.py69
1 files changed, 55 insertions, 14 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index e776cca..57bbf9b 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -37,6 +37,7 @@ from ..networking.exceptions import (
IncompleteRead,
network_exceptions,
)
+from ..networking.impersonate import ImpersonateTarget
from ..utils import (
IDENTITY,
JSON_LD_RE,
@@ -170,12 +171,12 @@ class InfoExtractor:
Automatically calculated from width and height
* dynamic_range The dynamic range of the video. One of:
"SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
- * tbr Average bitrate of audio and video in KBit/s
- * abr Average audio bitrate in KBit/s
+ * tbr Average bitrate of audio and video in kbps (1000 bits/sec)
+ * abr Average audio bitrate in kbps (1000 bits/sec)
* acodec Name of the audio codec in use
* asr Audio sampling rate in Hertz
* audio_channels Number of audio channels
- * vbr Average video bitrate in KBit/s
+ * vbr Average video bitrate in kbps (1000 bits/sec)
* fps Frame rate
* vcodec Name of the video codec in use
* container Name of the container format
@@ -246,7 +247,8 @@ class InfoExtractor:
* downloader_options A dictionary of downloader options
(For internal use only)
* http_chunk_size Chunk size for HTTP downloads
- * ffmpeg_args Extra arguments for ffmpeg downloader
+ * ffmpeg_args Extra arguments for ffmpeg downloader (input)
+ * ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
RTMP formats can also have the additional fields: page_url,
@@ -817,7 +819,7 @@ class InfoExtractor:
else:
return err.status in variadic(expected_status)
- def _create_request(self, url_or_request, data=None, headers=None, query=None):
+ def _create_request(self, url_or_request, data=None, headers=None, query=None, extensions=None):
if isinstance(url_or_request, urllib.request.Request):
self._downloader.deprecation_warning(
'Passing a urllib.request.Request to _create_request() is deprecated. '
@@ -826,10 +828,11 @@ class InfoExtractor:
elif not isinstance(url_or_request, Request):
url_or_request = Request(url_or_request)
- url_or_request.update(data=data, headers=headers, query=query)
+ url_or_request.update(data=data, headers=headers, query=query, extensions=extensions)
return url_or_request
- def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None,
+ headers=None, query=None, expected_status=None, impersonate=None, require_impersonation=False):
"""
Return the response handle.
@@ -860,8 +863,31 @@ class InfoExtractor:
headers = (headers or {}).copy()
headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
+ extensions = {}
+
+ if impersonate in (True, ''):
+ impersonate = ImpersonateTarget()
+ requested_targets = [
+ t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
+ for t in variadic(impersonate)
+ ] if impersonate else []
+
+ available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None)
+ if available_target:
+ extensions['impersonate'] = available_target
+ elif requested_targets:
+ message = 'The extractor is attempting impersonation, but '
+ message += (
+ 'no impersonate target is available' if not str(impersonate)
+ else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"')
+ info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation '
+ 'for information on installing the required dependencies')
+ if require_impersonation:
+ raise ExtractorError(f'{message}; {info_msg}', expected=True)
+ self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True)
+
try:
- return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
+ return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions))
except network_exceptions as err:
if isinstance(err, HTTPError):
if self.__can_accept_status_code(err, expected_status):
@@ -880,13 +906,14 @@ class InfoExtractor:
return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
- encoding=None, data=None, headers={}, query={}, expected_status=None):
+ encoding=None, data=None, headers={}, query={}, expected_status=None,
+ impersonate=None, require_impersonation=False):
"""
Return a tuple (page content as string, URL handle).
Arguments:
url_or_request -- plain text URL as a string or
- a urllib.request.Request object
+ a yt_dlp.networking.Request object
video_id -- Video/playlist/item identifier (string)
Keyword arguments:
@@ -911,13 +938,22 @@ class InfoExtractor:
returning True if it should be accepted
Note that this argument does not affect success status codes (2xx)
which are always accepted.
+ impersonate -- the impersonate target. Can be any of the following entities:
+ - an instance of yt_dlp.networking.impersonate.ImpersonateTarget
+ - a string in the format of CLIENT[:OS]
+ - a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances
+ - a boolean value; True means any impersonate target is sufficient
+ require_impersonation -- flag to toggle whether the request should raise an error
+ if impersonation is not possible (bool, default: False)
"""
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, str):
url_or_request = url_or_request.partition('#')[0]
- urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
+ urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data,
+ headers=headers, query=query, expected_status=expected_status,
+ impersonate=impersonate, require_impersonation=require_impersonation)
if urlh is False:
assert not fatal
return False
@@ -1046,17 +1082,20 @@ class InfoExtractor:
return getattr(ie, parser)(content, *args, **kwargs)
def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
+ impersonate=None, require_impersonation=False):
res = self._download_webpage_handle(
url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query, expected_status=expected_status)
+ data=data, headers=headers, query=query, expected_status=expected_status,
+ impersonate=impersonate, require_impersonation=require_impersonation)
if res is False:
return res
content, urlh = res
return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
+ impersonate=None, require_impersonation=False):
if self.get_param('load_pages'):
url_or_request = self._create_request(url_or_request, data, headers, query)
filename = self._request_dump_filename(url_or_request.url, video_id)
@@ -1079,6 +1118,8 @@ class InfoExtractor:
'headers': headers,
'query': query,
'expected_status': expected_status,
+ 'impersonate': impersonate,
+ 'require_impersonation': require_impersonation,
}
if parser is None:
kwargs.pop('transform_source')