From 2e2851dc13d73352530dd4495c7e05603b2e520d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 Apr 2024 23:38:38 +0200 Subject: Adding upstream version 2.1.2~dev0+20240219. Signed-off-by: Daniel Baumann --- deluge/httpdownloader.py | 333 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100644 deluge/httpdownloader.py (limited to 'deluge/httpdownloader.py') diff --git a/deluge/httpdownloader.py b/deluge/httpdownloader.py new file mode 100644 index 0000000..700ade0 --- /dev/null +++ b/deluge/httpdownloader.py @@ -0,0 +1,333 @@ +# +# Copyright (C) 2009 Andrew Resch +# +# This file is part of Deluge and is licensed under GNU General Public License 3.0, or later, with +# the additional special exception to link portions of this program with the OpenSSL library. +# See LICENSE for more details. +# + +import cgi +import logging +import os.path +import zlib + +from twisted.internet import reactor +from twisted.internet.defer import Deferred +from twisted.python.failure import Failure +from twisted.web import client, http +from twisted.web._newclient import HTTPClientParser +from twisted.web.error import Error, PageRedirect +from twisted.web.http_headers import Headers +from twisted.web.iweb import IAgent +from zope.interface import implementer + +from deluge.common import get_version + +log = logging.getLogger(__name__) + + +class CompressionDecoder(client.GzipDecoder): + """A compression decoder for gzip, x-gzip and deflate.""" + + def deliverBody(self, protocol): # NOQA: N802 + self.original.deliverBody(CompressionDecoderProtocol(protocol, self.original)) + + +class CompressionDecoderProtocol(client._GzipProtocol): + """A compression decoder protocol for CompressionDecoder.""" + + def __init__(self, protocol, response): + super().__init__(protocol, response) + self._zlibDecompress = zlib.decompressobj(32 + zlib.MAX_WBITS) + + +class BodyHandler(HTTPClientParser): + """An HTTP parser that saves the response to a file.""" + + def __init__(self, request, finished, length, agent, encoding=None): + """BodyHandler init. + + Args: + request (t.w.i.IClientRequest): The parser request. + finished (Deferred): A Deferred to handle the finished response. + length (int): The length of the response. + agent (t.w.i.IAgent): The agent from which the request was sent. + """ + super().__init__(request, finished) + self.agent = agent + self.finished = finished + self.total_length = length + self.current_length = 0 + self.data = b'' + self.encoding = encoding + + def dataReceived(self, data): # NOQA: N802 + self.current_length += len(data) + self.data += data + if self.agent.part_callback: + self.agent.part_callback(data, self.current_length, self.total_length) + + def connectionLost(self, reason): # NOQA: N802 + if self.encoding: + self.data = self.data.decode(self.encoding).encode('utf8') + with open(self.agent.filename, 'wb') as _file: + _file.write(self.data) + self.finished.callback(self.agent.filename) + self.state = 'DONE' + HTTPClientParser.connectionLost(self, reason) + + +@implementer(IAgent) +class HTTPDownloaderAgent: + """A File Downloader Agent.""" + + def __init__( + self, + agent, + filename, + part_callback=None, + force_filename=False, + allow_compression=True, + handle_redirect=True, + ): + """HTTPDownloaderAgent init. + + Args: + agent (t.w.c.Agent): The agent which will send the requests. + filename (str): The filename to save the file as. + force_filename (bool): Forces use of the supplied filename, + regardless of header content. + part_callback (func): A function to be called when a part of data + is received, it's signature should be: + func(data, current_length, total_length) + """ + + self.handle_redirect = handle_redirect + self.agent = agent + self.filename = filename + self.part_callback = part_callback + self.force_filename = force_filename + self.allow_compression = allow_compression + self.decoder = None + + def request_callback(self, response): + finished = Deferred() + + if not self.handle_redirect and response.code in ( + http.MOVED_PERMANENTLY, + http.FOUND, + http.SEE_OTHER, + http.TEMPORARY_REDIRECT, + ): + location = response.headers.getRawHeaders(b'location')[0] + error = PageRedirect(response.code, location=location) + finished.errback(Failure(error)) + elif response.code >= 400: + error = Error(response.code) + finished.errback(Failure(error)) + else: + headers = response.headers + body_length = int(headers.getRawHeaders(b'content-length', default=[0])[0]) + + if headers.hasHeader(b'content-disposition') and not self.force_filename: + content_disp = headers.getRawHeaders(b'content-disposition')[0].decode( + 'utf-8' + ) + content_disp_params = cgi.parse_header(content_disp)[1] + if 'filename' in content_disp_params: + new_file_name = content_disp_params['filename'] + new_file_name = sanitise_filename(new_file_name) + new_file_name = os.path.join( + os.path.split(self.filename)[0], new_file_name + ) + + count = 1 + fileroot = os.path.splitext(new_file_name)[0] + fileext = os.path.splitext(new_file_name)[1] + while os.path.isfile(new_file_name): + # Increment filename if already exists + new_file_name = f'{fileroot}-{count}{fileext}' + count += 1 + + self.filename = new_file_name + + cont_type_header = headers.getRawHeaders(b'content-type')[0].decode() + cont_type, params = cgi.parse_header(cont_type_header) + # Only re-ecode text content types. + encoding = None + if cont_type.startswith('text/'): + encoding = params.get('charset', None) + response.deliverBody( + BodyHandler(response.request, finished, body_length, self, encoding) + ) + + return finished + + def request(self, method, uri, headers=None, body_producer=None): + """Issue a new request to the wrapped agent. + + Args: + method (bytes): The HTTP method to use. + uri (bytes): The url to download from. + headers (t.w.h.Headers, optional): Any extra headers to send. + body_producer (t.w.i.IBodyProducer, optional): Request body data. + + Returns: + Deferred: The filename of the of the downloaded file. + """ + if headers is None: + headers = Headers() + + if not headers.hasHeader(b'User-Agent'): + version = get_version() + user_agent = 'Deluge/%s (https://deluge-torrent.org)' % version + headers.addRawHeader('User-Agent', user_agent) + + d = self.agent.request( + method=method, uri=uri, headers=headers, bodyProducer=body_producer + ) + d.addCallback(self.request_callback) + return d + + +def sanitise_filename(filename): + """Sanitises a filename to use as a download destination file. + + Logs any filenames that could be considered malicious. + + filename (str): The filename to sanitise. + + Returns: + str: The sanitised filename. + """ + + # Remove any quotes + filename = filename.strip('\'"') + + if os.path.basename(filename) != filename: + # Dodgy server, log it + log.warning( + 'Potentially malicious server: trying to write to file: %s', filename + ) + # Only use the basename + filename = os.path.basename(filename) + + filename = filename.strip() + if filename.startswith('.') or ';' in filename or '|' in filename: + # Dodgy server, log it + log.warning( + 'Potentially malicious server: trying to write to file: %s', filename + ) + + return filename + + +def _download_file( + url, + filename, + callback=None, + headers=None, + force_filename=False, + allow_compression=True, + handle_redirects=True, +): + """Downloads a file from a specific URL and returns a Deferred. + + A callback function can be specified to be called as parts are received. + + Args: + url (str): The url to download from. + filename (str): The filename to save the file as. + callback (func): A function to be called when partial data is received, + it's signature should be: func(data, current_length, total_length) + headers (dict): Any optional headers to send. + force_filename (bool): Force using the filename specified rather than + one the server may suggest. + allow_compression (bool): Allows gzip & deflate decoding. + + Returns: + Deferred: The filename of the downloaded file. + + Raises: + t.w.e.PageRedirect + t.w.e.Error: for all other HTTP response errors + """ + + agent = client.Agent(reactor) + + if allow_compression: + enc_accepted = ['gzip', 'x-gzip', 'deflate'] + decoders = [(enc.encode(), CompressionDecoder) for enc in enc_accepted] + agent = client.ContentDecoderAgent(agent, decoders) + if handle_redirects: + agent = client.RedirectAgent(agent) + + agent = HTTPDownloaderAgent( + agent, filename, callback, force_filename, allow_compression, handle_redirects + ) + + # The Headers init expects dict values to be a list. + if headers: + for name, value in list(headers.items()): + if not isinstance(value, list): + headers[name] = [value] + + return agent.request(b'GET', url.encode(), Headers(headers)) + + +def download_file( + url, + filename, + callback=None, + headers=None, + force_filename=False, + allow_compression=True, + handle_redirects=True, +): + """Downloads a file from a specific URL and returns a Deferred. + + A callback function can be specified to be called as parts are received. + + Args: + url (str): The url to download from. + filename (str): The filename to save the file as. + callback (func): A function to be called when partial data is received, + it's signature should be: func(data, current_length, total_length). + headers (dict): Any optional headers to send. + force_filename (bool): Force the filename specified rather than one the + server may suggest. + allow_compression (bool): Allows gzip & deflate decoding. + handle_redirects (bool): HTTP redirects handled automatically or not. + + Returns: + Deferred: The filename of the downloaded file. + + Raises: + t.w.e.PageRedirect: If handle_redirects is False. + t.w.e.Error: For all other HTTP response errors. + """ + + def on_download_success(result): + log.debug('Download success!') + return result + + def on_download_fail(failure): + log.warning( + 'Error occurred downloading file from "%s": %s', + url, + failure.getErrorMessage(), + ) + result = failure + return result + + d = _download_file( + url, + filename, + callback=callback, + headers=headers, + force_filename=force_filename, + allow_compression=allow_compression, + handle_redirects=handle_redirects, + ) + d.addCallbacks(on_download_success, on_download_fail) + return d -- cgit v1.2.3