diff options
Diffstat (limited to 'test/test_http_proxy.py')
-rw-r--r-- | test/test_http_proxy.py | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py new file mode 100644 index 0000000..1b21fe7 --- /dev/null +++ b/test/test_http_proxy.py @@ -0,0 +1,380 @@ +import abc +import base64 +import contextlib +import functools +import json +import os +import random +import ssl +import threading +from http.server import BaseHTTPRequestHandler +from socketserver import ThreadingTCPServer + +import pytest + +from test.helper import http_server_port, verify_address_availability +from test.test_networking import TEST_DIR +from test.test_socks import IPv6ThreadingTCPServer +from yt_dlp.dependencies import urllib3 +from yt_dlp.networking import Request +from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError + + +class HTTPProxyAuthMixin: + + def proxy_auth_error(self): + self.send_response(407) + self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"') + self.end_headers() + return False + + def do_proxy_auth(self, username, password): + if username is None and password is None: + return True + + proxy_auth_header = self.headers.get('Proxy-Authorization', None) + if proxy_auth_header is None: + return self.proxy_auth_error() + + if not proxy_auth_header.startswith('Basic '): + return self.proxy_auth_error() + + auth = proxy_auth_header[6:] + + try: + auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1) + except Exception: + return self.proxy_auth_error() + + if auth_username != (username or '') or auth_password != (password or ''): + return self.proxy_auth_error() + return True + + +class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin): + def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs): + self.username = username + self.password = password + self.proxy_info = proxy_info + super().__init__(*args, **kwargs) + + def do_GET(self): + if not self.do_proxy_auth(self.username, self.password): + self.server.close_request(self.request) + return + if self.path.endswith('/proxy_info'): + payload = json.dumps(self.proxy_info or { + 'client_address': self.client_address, + 'connect': False, + 'connect_host': None, + 'connect_port': None, + 'headers': dict(self.headers), + 'path': self.path, + 'proxy': ':'.join(str(y) for y in self.connection.getsockname()), + }) + self.send_response(200) + self.send_header('Content-Type', 'application/json; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload.encode()) + else: + self.send_response(404) + self.end_headers() + + self.server.close_request(self.request) + + +if urllib3: + import urllib3.util.ssltransport + + class SSLTransport(urllib3.util.ssltransport.SSLTransport): + """ + Modified version of urllib3 SSLTransport to support server side SSL + + This allows us to chain multiple TLS connections. + """ + + def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False): + self.incoming = ssl.MemoryBIO() + self.outgoing = ssl.MemoryBIO() + + self.suppress_ragged_eofs = suppress_ragged_eofs + self.socket = socket + + self.sslobj = ssl_context.wrap_bio( + self.incoming, + self.outgoing, + server_hostname=server_hostname, + server_side=server_side + ) + self._ssl_io_loop(self.sslobj.do_handshake) + + @property + def _io_refs(self): + return self.socket._io_refs + + @_io_refs.setter + def _io_refs(self, value): + self.socket._io_refs = value + + def shutdown(self, *args, **kwargs): + self.socket.shutdown(*args, **kwargs) +else: + SSLTransport = None + + +class HTTPSProxyHandler(HTTPProxyHandler): + def __init__(self, request, *args, **kwargs): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + if isinstance(request, ssl.SSLSocket): + request = SSLTransport(request, ssl_context=sslctx, server_side=True) + else: + request = sslctx.wrap_socket(request, server_side=True) + super().__init__(request, *args, **kwargs) + + +class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin): + protocol_version = 'HTTP/1.1' + default_request_version = 'HTTP/1.1' + + def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs): + self.username = username + self.password = password + self.request_handler = request_handler + super().__init__(*args, **kwargs) + + def do_CONNECT(self): + if not self.do_proxy_auth(self.username, self.password): + self.server.close_request(self.request) + return + self.send_response(200) + self.end_headers() + proxy_info = { + 'client_address': self.client_address, + 'connect': True, + 'connect_host': self.path.split(':')[0], + 'connect_port': int(self.path.split(':')[1]), + 'headers': dict(self.headers), + 'path': self.path, + 'proxy': ':'.join(str(y) for y in self.connection.getsockname()), + } + self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info) + self.server.close_request(self.request) + + +class HTTPSConnectProxyHandler(HTTPConnectProxyHandler): + def __init__(self, request, *args, **kwargs): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + request = sslctx.wrap_socket(request, server_side=True) + self._original_request = request + super().__init__(request, *args, **kwargs) + + def do_CONNECT(self): + super().do_CONNECT() + self.server.close_request(self._original_request) + + +@contextlib.contextmanager +def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs): + server = server_thread = None + try: + bind_address = bind_ip or '127.0.0.1' + server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer + server = server_type( + (bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs)) + server_port = http_server_port(server) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + if '.' not in bind_address: + yield f'[{bind_address}]:{server_port}' + else: + yield f'{bind_address}:{server_port}' + finally: + server.shutdown() + server.server_close() + server_thread.join(2.0) + + +class HTTPProxyTestContext(abc.ABC): + REQUEST_HANDLER_CLASS = None + REQUEST_PROTO = None + + def http_server(self, server_class, *args, **kwargs): + return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs) + + @abc.abstractmethod + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict: + """return a dict of proxy_info""" + + +class HTTPProxyHTTPTestContext(HTTPProxyTestContext): + # Standard HTTP Proxy for http requests + REQUEST_HANDLER_CLASS = HTTPProxyHandler + REQUEST_PROTO = 'http' + + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): + request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs) + handler.validate(request) + return json.loads(handler.send(request).read().decode()) + + +class HTTPProxyHTTPSTestContext(HTTPProxyTestContext): + # HTTP Connect proxy, for https requests + REQUEST_HANDLER_CLASS = HTTPSProxyHandler + REQUEST_PROTO = 'https' + + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): + request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs) + handler.validate(request) + return json.loads(handler.send(request).read().decode()) + + +CTX_MAP = { + 'http': HTTPProxyHTTPTestContext, + 'https': HTTPProxyHTTPSTestContext, +} + + +@pytest.fixture(scope='module') +def ctx(request): + return CTX_MAP[request.param]() + + +@pytest.mark.parametrize( + 'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) +@pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http +class TestHTTPProxy: + def test_http_no_auth(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler) as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['connect'] is False + assert 'Proxy-Authorization' not in proxy_info['headers'] + + def test_http_auth(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert 'Proxy-Authorization' in proxy_info['headers'] + + def test_http_bad_auth(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh: + with pytest.raises(HTTPError) as exc_info: + ctx.proxy_info_request(rh) + assert exc_info.value.response.status == 407 + exc_info.value.response.close() + + def test_http_source_address(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler) as server_address: + source_address = f'127.0.0.{random.randint(5, 255)}' + verify_address_availability(source_address) + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}, + source_address=source_address) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['client_address'][0] == source_address + + @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies') + def test_https(self, handler, ctx): + with ctx.http_server(HTTPSProxyHandler) as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['connect'] is False + assert 'Proxy-Authorization' not in proxy_info['headers'] + + @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies') + def test_https_verify_failed(self, handler, ctx): + with ctx.http_server(HTTPSProxyHandler) as server_address: + with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + # Accept SSLError as may not be feasible to tell if it is proxy or request error. + # note: if request proto also does ssl verification, this may also be the error of the request. + # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases. + with pytest.raises((ProxyError, SSLError)): + ctx.proxy_info_request(rh) + + def test_http_with_idn(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler) as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw') + assert proxy_info['proxy'] == server_address + assert proxy_info['path'].startswith('http://xn--fiq228c.tw') + assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw' + + +@pytest.mark.parametrize( + 'handler,ctx', [ + ('Requests', 'https'), + ('CurlCFFI', 'https'), + ], indirect=True) +class TestHTTPConnectProxy: + def test_http_connect_no_auth(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler) as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['connect'] is True + assert 'Proxy-Authorization' not in proxy_info['headers'] + + def test_http_connect_auth(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert 'Proxy-Authorization' in proxy_info['headers'] + + @pytest.mark.skip_handler( + 'Requests', + 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374' + ) + def test_http_connect_bad_auth(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh: + with pytest.raises(ProxyError): + ctx.proxy_info_request(rh) + + def test_http_connect_source_address(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler) as server_address: + source_address = f'127.0.0.{random.randint(5, 255)}' + verify_address_availability(source_address) + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}, + source_address=source_address, + verify=False) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['client_address'][0] == source_address + + @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') + def test_https_connect_proxy(self, handler, ctx): + with ctx.http_server(HTTPSConnectProxyHandler) as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['connect'] is True + assert 'Proxy-Authorization' not in proxy_info['headers'] + + @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') + def test_https_connect_verify_failed(self, handler, ctx): + with ctx.http_server(HTTPSConnectProxyHandler) as server_address: + with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + # Accept SSLError as may not be feasible to tell if it is proxy or request error. + # note: if request proto also does ssl verification, this may also be the error of the request. + # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases. + with pytest.raises((ProxyError, SSLError)): + ctx.proxy_info_request(rh) + + @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') + def test_https_connect_proxy_auth(self, handler, ctx): + with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert 'Proxy-Authorization' in proxy_info['headers'] |