diff options
Diffstat (limited to 'caching_proxy.py')
-rwxr-xr-x | caching_proxy.py | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/caching_proxy.py b/caching_proxy.py new file mode 100755 index 0000000..e57a851 --- /dev/null +++ b/caching_proxy.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +import sys +import os +import time +import http.client +import http.server +from io import StringIO +import pathlib +import urllib.parse + +oldcachedir = None +newcachedir = None +readonly = False + + +class ProxyRequestHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + assert int(self.headers.get("Content-Length", 0)) == 0 + assert self.headers["Host"] + pathprefix = "http://" + self.headers["Host"] + "/" + assert self.path.startswith(pathprefix) + sanitizedpath = urllib.parse.unquote(self.path.removeprefix(pathprefix)) + oldpath = oldcachedir / sanitizedpath + newpath = newcachedir / sanitizedpath + + if not readonly: + newpath.parent.mkdir(parents=True, exist_ok=True) + + # just send back to client + if newpath.exists(): + print(f"proxy cached: {self.path}", file=sys.stderr) + self.wfile.write(b"HTTP/1.1 200 OK\r\n") + self.send_header("Content-Length", newpath.stat().st_size) + self.end_headers() + with newpath.open(mode="rb") as new: + while True: + buf = new.read(64 * 1024) # same as shutil uses + if not buf: + break + self.wfile.write(buf) + self.wfile.flush() + return + + if readonly: + newpath = pathlib.Path("/dev/null") + + # copy from oldpath to newpath and send back to client + # Only take files from the old cache if they are .deb files or Packages + # files in the by-hash directory as only those are unique by their path + # name. Other files like InRelease files have to be downloaded afresh. + if oldpath.exists() and ( + oldpath.suffix == ".deb" or "by-hash" in oldpath.parts + ): + print(f"proxy cached: {self.path}", file=sys.stderr) + self.wfile.write(b"HTTP/1.1 200 OK\r\n") + self.send_header("Content-Length", oldpath.stat().st_size) + self.end_headers() + with oldpath.open(mode="rb") as old, newpath.open(mode="wb") as new: + # we are not using shutil.copyfileobj() because we want to + # write to two file objects simultaneously + while True: + buf = old.read(64 * 1024) # same as shutil uses + if not buf: + break + self.wfile.write(buf) + new.write(buf) + self.wfile.flush() + return + + # download fresh copy + try: + print(f"\rproxy download: {self.path}", file=sys.stderr) + conn = http.client.HTTPConnection(self.headers["Host"], timeout=5) + conn.request("GET", self.path, None, dict(self.headers)) + res = conn.getresponse() + assert (res.status, res.reason) == (200, "OK"), (res.status, res.reason) + self.wfile.write(b"HTTP/1.1 200 OK\r\n") + for k, v in res.getheaders(): + # do not allow a persistent connection + if k == "connection": + continue + self.send_header(k, v) + self.end_headers() + with newpath.open(mode="wb") as f: + # we are not using shutil.copyfileobj() because we want to + # write to two file objects simultaneously and throttle the + # writing speed to 1024 kB/s + while True: + buf = res.read(64 * 1024) # same as shutil uses + if not buf: + break + self.wfile.write(buf) + f.write(buf) + time.sleep(64 / 1024) # 1024 kB/s + self.wfile.flush() + except Exception as e: + self.send_error(502) + + +def main(): + global oldcachedir, newcachedir, readonly + if sys.argv[1] == "--readonly": + readonly = True + oldcachedir = pathlib.Path(sys.argv[2]) + newcachedir = pathlib.Path(sys.argv[3]) + else: + oldcachedir = pathlib.Path(sys.argv[1]) + newcachedir = pathlib.Path(sys.argv[2]) + print(f"starting caching proxy for {newcachedir}", file=sys.stderr) + httpd = http.server.ThreadingHTTPServer( + server_address=("", 8080), RequestHandlerClass=ProxyRequestHandler + ) + httpd.serve_forever() + + +if __name__ == "__main__": + main() |