From b12aca2eea3676bb3cf2666b63542c019fac699d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 09:54:09 +0200 Subject: Merging upstream version 2024.04.09. Signed-off-by: Daniel Baumann --- test/test_YoutubeDL.py | 4 +- test/test_cookies.py | 4 +- test/test_networking.py | 632 ++++++++++++++++++++++++++++++++++++++---------- test/test_socks.py | 33 ++- test/test_traversal.py | 444 ++++++++++++++++++++++++++++++++++ test/test_utils.py | 383 +---------------------------- test/test_websockets.py | 53 ++-- 7 files changed, 1004 insertions(+), 549 deletions(-) create mode 100644 test/test_traversal.py (limited to 'test') diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 6be47af..5242cf8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -183,7 +183,7 @@ class TestFormatSelection(unittest.TestCase): ] info_dict = _make_result(formats) - ydl = YDL({'format': 'best'}) + ydl = YDL({'format': 'best', 'format_sort': ['abr', 'ext']}) ydl.sort_formats(info_dict) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] @@ -195,7 +195,7 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'mp3-64') - ydl = YDL({'prefer_free_formats': True}) + ydl = YDL({'prefer_free_formats': True, 'format_sort': ['abr', 'ext']}) ydl.sort_formats(info_dict) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] diff --git a/test/test_cookies.py b/test/test_cookies.py index 5282ef6..bd61f30 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -1,5 +1,5 @@ +import datetime as dt import unittest -from datetime import datetime, timezone from yt_dlp import cookies from yt_dlp.cookies import ( @@ -138,7 +138,7 @@ class TestCookies(unittest.TestCase): self.assertEqual(cookie.name, 'foo') self.assertEqual(cookie.value, 'test%20%3Bcookie') self.assertFalse(cookie.secure) - expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc) + expected_expiration = dt.datetime(2021, 6, 18, 21, 39, 19, tzinfo=dt.timezone.utc) self.assertEqual(cookie.expires, int(expected_expiration.timestamp())) def test_pbkdf2_sha1(self): diff --git a/test/test_networking.py b/test/test_networking.py index 628f1f1..b50f70d 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -27,9 +27,10 @@ import zlib from email.message import Message from http.cookiejar import CookieJar +from test.conftest import validate_and_send from test.helper import FakeYDL, http_server_port, verify_address_availability from yt_dlp.cookies import YoutubeDLCookieJar -from yt_dlp.dependencies import brotli, requests, urllib3 +from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.networking import ( HEADRequest, PUTRequest, @@ -50,10 +51,13 @@ from yt_dlp.networking.exceptions import ( TransportError, UnsupportedRequest, ) +from yt_dlp.networking.impersonate import ( + ImpersonateRequestHandler, + ImpersonateTarget, +) +from yt_dlp.utils import YoutubeDLError from yt_dlp.utils._utils import _YDLLogger as FakeLogger -from yt_dlp.utils.networking import HTTPHeaderDict - -from test.conftest import validate_and_send +from yt_dlp.utils.networking import HTTPHeaderDict, std_headers TEST_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -75,6 +79,7 @@ def _build_proxy_handler(name): class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): protocol_version = 'HTTP/1.1' + default_request_version = 'HTTP/1.1' def log_message(self, format, *args): pass @@ -112,6 +117,8 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): def _read_data(self): if 'Content-Length' in self.headers: return self.rfile.read(int(self.headers['Content-Length'])) + else: + return b'' def do_POST(self): data = self._read_data() + str(self.headers).encode() @@ -195,7 +202,8 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): self._headers() elif self.path.startswith('/308-to-headers'): self.send_response(308) - self.send_header('Location', '/headers') + # redirect to "localhost" for testing cookie redirection handling + self.send_header('Location', f'http://localhost:{self.connection.getsockname()[1]}/headers') self.send_header('Content-Length', '0') self.end_headers() elif self.path == '/trailing_garbage': @@ -310,7 +318,7 @@ class TestRequestHandlerBase: class TestHTTPRequestHandler(TestRequestHandlerBase): - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_verify_cert(self, handler): with handler() as rh: with pytest.raises(CertificateVerifyError): @@ -321,7 +329,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert r.status == 200 r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_ssl_error(self, handler): # HTTPS server with too old TLS version # XXX: is there a better way to test this than to create a new server? @@ -335,11 +343,11 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): https_server_thread.start() with handler(verify=False) as rh: - with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info: + with pytest.raises(SSLError, match=r'(?i)ssl(?:v3|/tls).alert.handshake.failure') as exc_info: validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) assert not issubclass(exc_info.type, CertificateVerifyError) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_percent_encode(self, handler): with handler() as rh: # Unicode characters should be encoded with uppercase percent-encoding @@ -351,7 +359,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.status == 200 res.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) @pytest.mark.parametrize('path', [ '/a/b/./../../headers', '/redirect_dotsegments', @@ -367,6 +375,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.url == f'http://127.0.0.1:{self.http_port}/headers' res.close() + # Not supported by CurlCFFI (non-standard) @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) def test_unicode_path_redirection(self, handler): with handler() as rh: @@ -374,7 +383,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html' r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_raise_http_error(self, handler): with handler() as rh: for bad_status in (400, 500, 599, 302): @@ -384,7 +393,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): # Should not raise an error validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_response_url(self, handler): with handler() as rh: # Response url should be that of the last url in redirect chain @@ -395,62 +404,50 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200' res2.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) - def test_redirect(self, handler): + # Covers some basic cases we expect some level of consistency between request handlers for + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + @pytest.mark.parametrize('redirect_status,method,expected', [ + # A 303 must either use GET or HEAD for subsequent request + (303, 'POST', ('', 'GET', False)), + (303, 'HEAD', ('', 'HEAD', False)), + + # 301 and 302 turn POST only into a GET + (301, 'POST', ('', 'GET', False)), + (301, 'HEAD', ('', 'HEAD', False)), + (302, 'POST', ('', 'GET', False)), + (302, 'HEAD', ('', 'HEAD', False)), + + # 307 and 308 should not change method + (307, 'POST', ('testdata', 'POST', True)), + (308, 'POST', ('testdata', 'POST', True)), + (307, 'HEAD', ('', 'HEAD', False)), + (308, 'HEAD', ('', 'HEAD', False)), + ]) + def test_redirect(self, handler, redirect_status, method, expected): with handler() as rh: - def do_req(redirect_status, method, assert_no_content=False): - data = b'testdata' if method in ('POST', 'PUT') else None - res = validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data)) - - headers = b'' - data_sent = b'' - if data is not None: - data_sent += res.read(len(data)) - if data_sent != data: - headers += data_sent - data_sent = b'' - - headers += res.read() - - if assert_no_content or data is None: - assert b'Content-Type' not in headers - assert b'Content-Length' not in headers - else: - assert b'Content-Type' in headers - assert b'Content-Length' in headers - - return data_sent.decode(), res.headers.get('method', '') - - # A 303 must either use GET or HEAD for subsequent request - assert do_req(303, 'POST', True) == ('', 'GET') - assert do_req(303, 'HEAD') == ('', 'HEAD') - - assert do_req(303, 'PUT', True) == ('', 'GET') - - # 301 and 302 turn POST only into a GET - assert do_req(301, 'POST', True) == ('', 'GET') - assert do_req(301, 'HEAD') == ('', 'HEAD') - assert do_req(302, 'POST', True) == ('', 'GET') - assert do_req(302, 'HEAD') == ('', 'HEAD') - - assert do_req(301, 'PUT') == ('testdata', 'PUT') - assert do_req(302, 'PUT') == ('testdata', 'PUT') + data = b'testdata' if method == 'POST' else None + headers = {} + if data is not None: + headers['Content-Type'] = 'application/test' + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data, + headers=headers)) - # 307 and 308 should not change method - for m in ('POST', 'PUT'): - assert do_req(307, m) == ('testdata', m) - assert do_req(308, m) == ('testdata', m) + headers = b'' + data_recv = b'' + if data is not None: + data_recv += res.read(len(data)) + if data_recv != data: + headers += data_recv + data_recv = b'' - assert do_req(307, 'HEAD') == ('', 'HEAD') - assert do_req(308, 'HEAD') == ('', 'HEAD') + headers += res.read() - # These should not redirect and instead raise an HTTPError - for code in (300, 304, 305, 306): - with pytest.raises(HTTPError): - do_req(code, 'GET') + assert expected[0] == data_recv.decode() + assert expected[1] == res.headers.get('method') + assert expected[2] == ('content-length' in headers.decode().lower()) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_request_cookie_header(self, handler): # We should accept a Cookie header being passed as in normal headers and handle it appropriately. with handler() as rh: @@ -459,16 +456,17 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request( f'http://127.0.0.1:{self.http_port}/headers', headers={'Cookie': 'test=test'})).read().decode() - assert 'Cookie: test=test' in res + assert 'cookie: test=test' in res.lower() # Specified Cookie header should be removed on any redirect res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/308-to-headers', - headers={'Cookie': 'test=test'})).read().decode() - assert 'Cookie: test=test' not in res + headers={'Cookie': 'test=test2'})).read().decode() + assert 'cookie: test=test2' not in res.lower() # Specified Cookie header should override global cookiejar for that request + # Whether cookies from the cookiejar is applied on the redirect is considered undefined for now cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( version=0, name='test', value='ytdlp', port=None, port_specified=False, @@ -478,23 +476,23 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): with handler(cookiejar=cookiejar) as rh: data = validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read() - assert b'Cookie: test=ytdlp' not in data - assert b'Cookie: test=test' in data + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test3'})).read() + assert b'cookie: test=ytdlp' not in data.lower() + assert b'cookie: test=test3' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_redirect_loop(self, handler): with handler() as rh: with pytest.raises(HTTPError, match='redirect loop'): validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop')) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_incompleteread(self, handler): with handler(timeout=2) as rh: - with pytest.raises(IncompleteRead): + with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'): validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_cookies(self, handler): cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( @@ -503,47 +501,66 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): with handler(cookiejar=cookiejar) as rh: data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() - assert b'Cookie: test=ytdlp' in data + assert b'cookie: test=ytdlp' in data.lower() # Per request with handler() as rh: data = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read() - assert b'Cookie: test=ytdlp' in data + assert b'cookie: test=ytdlp' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_headers(self, handler): with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: # Global Headers - data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() - assert b'Test1: test' in data + data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read().lower() + assert b'test1: test' in data # Per request headers, merged with global data = validate_and_send(rh, Request( - f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read() - assert b'Test1: test' in data - assert b'Test2: changed' in data - assert b'Test2: test2' not in data - assert b'Test3: test3' in data - - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) - def test_timeout(self, handler): + f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read().lower() + assert b'test1: test' in data + assert b'test2: changed' in data + assert b'test2: test2' not in data + assert b'test3: test3' in data + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + def test_read_timeout(self, handler): with handler() as rh: # Default timeout is 20 seconds, so this should go through validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3')) + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1')) - with handler(timeout=0.5) as rh: + with handler(timeout=0.1) as rh: with pytest.raises(TransportError): validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1')) + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_5')) # Per request timeout, should override handler timeout validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4})) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + def test_connect_timeout(self, handler): + # nothing should be listening on this port + connect_timeout_url = 'http://10.255.255.255' + with handler(timeout=0.01) as rh: + now = time.time() + with pytest.raises(TransportError): + validate_and_send( + rh, Request(connect_timeout_url)) + assert 0.01 <= time.time() - now < 20 + + with handler() as rh: + with pytest.raises(TransportError): + # Per request timeout, should override handler timeout + now = time.time() + validate_and_send( + rh, Request(connect_timeout_url, extensions={'timeout': 0.01})) + assert 0.01 <= time.time() - now < 20 + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_source_address(self, handler): source_address = f'127.0.0.{random.randint(5, 255)}' # on some systems these loopback addresses we need for testing may not be available @@ -554,6 +571,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode() assert source_address == data + # Not supported by CurlCFFI @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) def test_gzip_trailing_garbage(self, handler): with handler() as rh: @@ -571,7 +589,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.headers.get('Content-Encoding') == 'br' assert res.read() == b'