1 files changed, 1199 insertions, 0 deletions
diff --git a/src/rgw/rgw_asio_frontend.cc b/src/rgw/rgw_asio_frontend.cc
new file mode 100644
index 000000000..633a29633
--- /dev/null
+++ b/src/rgw/rgw_asio_frontend.cc
@@ -0,0 +1,1199 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include <atomic>
+#include <ctime>
+#include <thread>
+#include <vector>
+
+#include <boost/asio.hpp>
+#include <boost/intrusive/list.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+
+#include <boost/context/protected_fixedsize_stack.hpp>
+#include <spawn/spawn.hpp>
+
+#include "common/async/shared_mutex.h"
+#include "common/errno.h"
+#include "common/strtol.h"
+
+#include "rgw_asio_client.h"
+#include "rgw_asio_frontend.h"
+
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+#include <boost/asio/ssl.hpp>
+#endif
+
+#include "common/split.h"
+
+#include "services/svc_config_key.h"
+#include "services/svc_zone.h"
+
+#include "rgw_zone.h"
+
+#include "rgw_asio_frontend_timer.h"
+#include "rgw_dmclock_async_scheduler.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+namespace {
+
+using tcp = boost::asio::ip::tcp;
+namespace http = boost::beast::http;
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+namespace ssl = boost::asio::ssl;
+#endif
+
+struct Connection;
+
+// use explicit executor types instead of the type-erased boost::asio::executor
+using executor_type = boost::asio::io_context::executor_type;
+
+using tcp_socket = boost::asio::basic_stream_socket<tcp, executor_type>;
+using tcp_stream = boost::beast::basic_stream<tcp, executor_type>;
+
+using timeout_timer = rgw::basic_timeout_timer<ceph::coarse_mono_clock,
+      executor_type, Connection>;
+
+static constexpr size_t parse_buffer_size = 65536;
+using parse_buffer = boost::beast::flat_static_buffer<parse_buffer_size>;
+
+// use mmap/mprotect to allocate 512k coroutine stacks
+auto make_stack_allocator() {
+  return boost::context::protected_fixedsize_stack{512*1024};
+}
+
+using namespace std;
+
+template <typename Stream>
+class StreamIO : public rgw::asio::ClientIO {
+  CephContext* const cct;
+  Stream& stream;
+  timeout_timer& timeout;
+  yield_context yield;
+  parse_buffer& buffer;
+  boost::system::error_code fatal_ec;
+ public:
+  StreamIO(CephContext *cct, Stream& stream, timeout_timer& timeout,
+           rgw::asio::parser_type& parser, yield_context yield,
+           parse_buffer& buffer, bool is_ssl,
+           const tcp::endpoint& local_endpoint,
+           const tcp::endpoint& remote_endpoint)
+      : ClientIO(parser, is_ssl, local_endpoint, remote_endpoint),
+        cct(cct), stream(stream), timeout(timeout), yield(yield),
+        buffer(buffer)
+  {}
+
+  boost::system::error_code get_fatal_error_code() const { return fatal_ec; }
+
+  size_t write_data(const char* buf, size_t len) override {
+    boost::system::error_code ec;
+    timeout.start();
+    auto bytes = boost::asio::async_write(stream, boost::asio::buffer(buf, len),
+                                          yield[ec]);
+    timeout.cancel();
+    if (ec) {
+      ldout(cct, 4) << "write_data failed: " << ec.message() << dendl;
+      if (ec == boost::asio::error::broken_pipe) {
+        boost::system::error_code ec_ignored;
+        stream.lowest_layer().shutdown(tcp_socket::shutdown_both, ec_ignored);
+      }
+      if (!fatal_ec) {
+        fatal_ec = ec;
+      }
+      throw rgw::io::Exception(ec.value(), std::system_category());
+    }
+    return bytes;
+  }
+
+  size_t recv_body(char* buf, size_t max) override {
+    auto& message = parser.get();
+    auto& body_remaining = message.body();
+    body_remaining.data = buf;
+    body_remaining.size = max;
+
+    while (body_remaining.size && !parser.is_done()) {
+      boost::system::error_code ec;
+      timeout.start();
+      http::async_read_some(stream, buffer, parser, yield[ec]);
+      timeout.cancel();
+      if (ec == http::error::need_buffer) {
+        break;
+      }
+      if (ec) {
+        ldout(cct, 4) << "failed to read body: " << ec.message() << dendl;
+        if (!fatal_ec) {
+          fatal_ec = ec;
+        }
+        throw rgw::io::Exception(ec.value(), std::system_category());
+      }
+    }
+    return max - body_remaining.size;
+  }
+};
+
+// output the http version as a string, ie 'HTTP/1.1'
+struct http_version {
+  unsigned major_ver;
+  unsigned minor_ver;
+  explicit http_version(unsigned version)
+    : major_ver(version / 10), minor_ver(version % 10) {}
+};
+std::ostream& operator<<(std::ostream& out, const http_version& v) {
+  return out << "HTTP/" << v.major_ver << '.' << v.minor_ver;
+}
+
+// log an http header value or '-' if it's missing
+struct log_header {
+  const http::fields& fields;
+  http::field field;
+  std::string_view quote;
+  log_header(const http::fields& fields, http::field field,
+             std::string_view quote = "")
+    : fields(fields), field(field), quote(quote) {}
+};
+std::ostream& operator<<(std::ostream& out, const log_header& h) {
+  auto p = h.fields.find(h.field);
+  if (p == h.fields.end()) {
+    return out << '-';
+  }
+  return out << h.quote << p->value() << h.quote;
+}
+
+// log fractional seconds in milliseconds
+struct log_ms_remainder {
+  ceph::coarse_real_time t;
+  log_ms_remainder(ceph::coarse_real_time t) : t(t) {}
+};
+std::ostream& operator<<(std::ostream& out, const log_ms_remainder& m) {
+  using namespace std::chrono;
+  return out << std::setfill('0') << std::setw(3)
+      << duration_cast<milliseconds>(m.t.time_since_epoch()).count() % 1000;
+}
+
+// log time in apache format: day/month/year:hour:minute:second zone
+struct log_apache_time {
+  ceph::coarse_real_time t;
+  log_apache_time(ceph::coarse_real_time t) : t(t) {}
+};
+std::ostream& operator<<(std::ostream& out, const log_apache_time& a) {
+  const auto t = ceph::coarse_real_clock::to_time_t(a.t);
+  const auto local = std::localtime(&t);
+  return out << std::put_time(local, "%d/%b/%Y:%T.") << log_ms_remainder{a.t}
+      << std::put_time(local, " %z");
+};
+
+using SharedMutex = ceph::async::SharedMutex<boost::asio::io_context::executor_type>;
+
+template <typename Stream>
+void handle_connection(boost::asio::io_context& context,
+                       RGWProcessEnv& env, Stream& stream,
+                       timeout_timer& timeout, size_t header_limit,
+                       parse_buffer& buffer, bool is_ssl,
+                       SharedMutex& pause_mutex,
+                       rgw::dmclock::Scheduler *scheduler,
+                       const std::string& uri_prefix,
+                       boost::system::error_code& ec,
+                       yield_context yield)
+{
+  // don't impose a limit on the body, since we read it in pieces
+  static constexpr size_t body_limit = std::numeric_limits<size_t>::max();
+
+  auto cct = env.driver->ctx();
+
+  // read messages from the stream until eof
+  for (;;) {
+    // configure the parser
+    rgw::asio::parser_type parser;
+    parser.header_limit(header_limit);
+    parser.body_limit(body_limit);
+    timeout.start();
+    // parse the header
+    http::async_read_header(stream, buffer, parser, yield[ec]);
+    timeout.cancel();
+    if (ec == boost::asio::error::connection_reset ||
+        ec == boost::asio::error::bad_descriptor ||
+        ec == boost::asio::error::operation_aborted ||
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+        ec == ssl::error::stream_truncated ||
+#endif
+        ec == http::error::end_of_stream) {
+      ldout(cct, 20) << "failed to read header: " << ec.message() << dendl;
+      return;
+    }
+    auto& message = parser.get();
+    if (ec) {
+      ldout(cct, 1) << "failed to read header: " << ec.message() << dendl;
+      http::response<http::empty_body> response;
+      response.result(http::status::bad_request);
+      response.version(message.version() == 10 ? 10 : 11);
+      response.prepare_payload();
+      timeout.start();
+      http::async_write(stream, response, yield[ec]);
+      timeout.cancel();
+      if (ec) {
+        ldout(cct, 5) << "failed to write response: " << ec.message() << dendl;
+      }
+      ldout(cct, 1) << "====== req done http_status=400 ======" << dendl;
+      return;
+    }
+
+    bool expect_continue = (message[http::field::expect] == "100-continue");
+
+    {
+      auto lock = pause_mutex.async_lock_shared(yield[ec]);
+      if (ec == boost::asio::error::operation_aborted) {
+        return;
+      } else if (ec) {
+        ldout(cct, 1) << "failed to lock: " << ec.message() << dendl;
+        return;
+      }
+
+      // process the request
+      RGWRequest req{env.driver->get_new_req_id()};
+
+      auto& socket = stream.lowest_layer();
+      const auto& remote_endpoint = socket.remote_endpoint(ec);
+      if (ec) {
+        ldout(cct, 1) << "failed to connect client: " << ec.message() << dendl;
+        return;
+      }
+      const auto& local_endpoint = socket.local_endpoint(ec);
+      if (ec) {
+        ldout(cct, 1) << "failed to connect client: " << ec.message() << dendl;
+        return;
+      }
+
+      StreamIO real_client{cct, stream, timeout, parser, yield, buffer,
+                           is_ssl, local_endpoint, remote_endpoint};
+
+      auto real_client_io = rgw::io::add_reordering(
+                              rgw::io::add_buffering(cct,
+                                rgw::io::add_chunking(
+                                  rgw::io::add_conlen_controlling(
+                                    &real_client))));
+      RGWRestfulIO client(cct, &real_client_io);
+      optional_yield y = null_yield;
+      if (cct->_conf->rgw_beast_enable_async) {
+        y = optional_yield{context, yield};
+      }
+      int http_ret = 0;
+      string user = "-";
+      const auto started = ceph::coarse_real_clock::now();
+      ceph::coarse_real_clock::duration latency{};
+      process_request(env, &req, uri_prefix, &client, y,
+                      scheduler, &user, &latency, &http_ret);
+
+      if (cct->_conf->subsys.should_gather(ceph_subsys_rgw_access, 1)) {
+        // access log line elements begin per Apache Combined Log Format with additions following
+        lsubdout(cct, rgw_access, 1) << "beast: " << std::hex << &req << std::dec << ": "
+            << remote_endpoint.address() << " - " << user << " [" << log_apache_time{started} << "] \""
+            << message.method_string() << ' ' << message.target() << ' '
+            << http_version{message.version()} << "\" " << http_ret << ' '
+            << client.get_bytes_sent() + client.get_bytes_received() << ' '
+            << log_header{message, http::field::referer, "\""} << ' '
+            << log_header{message, http::field::user_agent, "\""} << ' '
+            << log_header{message, http::field::range} << " latency="
+            << latency << dendl;
+      }
+
+      // process_request() can't distinguish between connection errors and
+      // http/s3 errors, so check StreamIO for fatal connection errors
+      ec = real_client.get_fatal_error_code();
+      if (ec) {
+        return;
+      }
+
+      if (real_client.sent_100_continue()) {
+        expect_continue = false;
+      }
+    }
+
+    if (!parser.keep_alive()) {
+      return;
+    }
+
+    // if we failed before reading the entire message, discard any remaining
+    // bytes before reading the next
+    while (!expect_continue && !parser.is_done()) {
+      static std::array<char, 1024> discard_buffer;
+
+      auto& body = parser.get().body();
+      body.size = discard_buffer.size();
+      body.data = discard_buffer.data();
+
+      timeout.start();
+      http::async_read_some(stream, buffer, parser, yield[ec]);
+      timeout.cancel();
+      if (ec == http::error::need_buffer) {
+        continue;
+      }
+      if (ec == boost::asio::error::connection_reset) {
+        return;
+      }
+      if (ec) {
+        ldout(cct, 5) << "failed to discard unread message: "
+            << ec.message() << dendl;
+        return;
+      }
+    }
+  }
+}
+
+// timeout support requires that connections are reference-counted, because the
+// timeout_handler can outlive the coroutine
+struct Connection : boost::intrusive::list_base_hook<>,
+                    boost::intrusive_ref_counter<Connection>
+{
+  tcp_socket socket;
+  parse_buffer buffer;
+
+  explicit Connection(tcp_socket&& socket) noexcept
+      : socket(std::move(socket)) {}
+
+  void close(boost::system::error_code& ec) {
+    socket.close(ec);
+  }
+
+  tcp_socket& get_socket() { return socket; }
+};
+
+class ConnectionList {
+  using List = boost::intrusive::list<Connection>;
+  List connections;
+  std::mutex mutex;
+
+  void remove(Connection& c) {
+    std::lock_guard lock{mutex};
+    if (c.is_linked()) {
+      connections.erase(List::s_iterator_to(c));
+    }
+  }
+ public:
+  class Guard {
+    ConnectionList *list;
+    Connection *conn;
+   public:
+    Guard(ConnectionList *list, Connection *conn) : list(list), conn(conn) {}
+    ~Guard() { list->remove(*conn); }
+  };
+  [[nodiscard]] Guard add(Connection& conn) {
+    std::lock_guard lock{mutex};
+    connections.push_back(conn);
+    return Guard{this, &conn};
+  }
+  void close(boost::system::error_code& ec) {
+    std::lock_guard lock{mutex};
+    for (auto& conn : connections) {
+      conn.socket.close(ec);
+    }
+    connections.clear();
+  }
+};
+
+namespace dmc = rgw::dmclock;
+class AsioFrontend {
+  RGWProcessEnv& env;
+  RGWFrontendConfig* conf;
+  boost::asio::io_context context;
+  std::string uri_prefix;
+  ceph::timespan request_timeout = std::chrono::milliseconds(REQUEST_TIMEOUT);
+  size_t header_limit = 16384;
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+  boost::optional<ssl::context> ssl_context;
+  int get_config_key_val(string name,
+                         const string& type,
+                         bufferlist *pbl);
+  int ssl_set_private_key(const string& name, bool is_ssl_cert);
+  int ssl_set_certificate_chain(const string& name);
+  int init_ssl();
+#endif
+  SharedMutex pause_mutex;
+  std::unique_ptr<rgw::dmclock::Scheduler> scheduler;
+
+  struct Listener {
+    tcp::endpoint endpoint;
+    tcp::acceptor acceptor;
+    tcp_socket socket;
+    bool use_ssl = false;
+    bool use_nodelay = false;
+
+    explicit Listener(boost::asio::io_context& context)
+      : acceptor(context), socket(context) {}
+  };
+  std::vector<Listener> listeners;
+
+  ConnectionList connections;
+
+  // work guard to keep run() threads busy while listeners are paused
+  using Executor = boost::asio::io_context::executor_type;
+  std::optional<boost::asio::executor_work_guard<Executor>> work;
+
+  std::vector<std::thread> threads;
+  std::atomic<bool> going_down{false};
+
+  CephContext* ctx() const { return env.driver->ctx(); }
+  std::optional<dmc::ClientCounters> client_counters;
+  std::unique_ptr<dmc::ClientConfig> client_config;
+  void accept(Listener& listener, boost::system::error_code ec);
+
+ public:
+  AsioFrontend(RGWProcessEnv& env, RGWFrontendConfig* conf,
+	       dmc::SchedulerCtx& sched_ctx)
+    : env(env), conf(conf), pause_mutex(context.get_executor())
+  {
+    auto sched_t = dmc::get_scheduler_t(ctx());
+    switch(sched_t){
+    case dmc::scheduler_t::dmclock:
+      scheduler.reset(new dmc::AsyncScheduler(ctx(),
+                                              context,
+                                              std::ref(sched_ctx.get_dmc_client_counters()),
+                                              sched_ctx.get_dmc_client_config(),
+                                              *sched_ctx.get_dmc_client_config(),
+                                              dmc::AtLimit::Reject));
+      break;
+    case dmc::scheduler_t::none:
+      lderr(ctx()) << "Got invalid scheduler type for beast, defaulting to throttler" << dendl;
+      [[fallthrough]];
+    case dmc::scheduler_t::throttler:
+      scheduler.reset(new dmc::SimpleThrottler(ctx()));
+
+    }
+  }
+
+  int init();
+  int run();
+  void stop();
+  void join();
+  void pause();
+  void unpause();
+};
+
+unsigned short parse_port(const char *input, boost::system::error_code& ec)
+{
+  char *end = nullptr;
+  auto port = std::strtoul(input, &end, 10);
+  if (port > std::numeric_limits<unsigned short>::max()) {
+    ec.assign(ERANGE, boost::system::system_category());
+  } else if (port == 0 && end == input) {
+    ec.assign(EINVAL, boost::system::system_category());
+  }
+  return port;
+}
+	
+tcp::endpoint parse_endpoint(boost::asio::string_view input,
+                             unsigned short default_port,
+                             boost::system::error_code& ec)
+{
+  tcp::endpoint endpoint;
+
+  if (input.empty()) {
+    ec = boost::asio::error::invalid_argument;
+    return endpoint;
+  }
+
+  if (input[0] == '[') { // ipv6
+    const size_t addr_begin = 1;
+    const size_t addr_end = input.find(']');
+    if (addr_end == input.npos) { // no matching ]
+      ec = boost::asio::error::invalid_argument;
+      return endpoint;
+    }
+    if (addr_end + 1 < input.size()) {
+      // :port must must follow [ipv6]
+      if (input[addr_end + 1] != ':') {
+        ec = boost::asio::error::invalid_argument;
+        return endpoint;
+      } else {
+        auto port_str = input.substr(addr_end + 2);
+        endpoint.port(parse_port(port_str.data(), ec));
+      }
+    } else {
+      endpoint.port(default_port);
+    }
+    auto addr = input.substr(addr_begin, addr_end - addr_begin);
+    endpoint.address(boost::asio::ip::make_address_v6(addr, ec));
+  } else { // ipv4
+    auto colon = input.find(':');
+    if (colon != input.npos) {
+      auto port_str = input.substr(colon + 1);
+      endpoint.port(parse_port(port_str.data(), ec));
+      if (ec) {
+        return endpoint;
+      }
+    } else {
+      endpoint.port(default_port);
+    }
+    auto addr = input.substr(0, colon);
+    endpoint.address(boost::asio::ip::make_address_v4(addr, ec));
+  }
+  return endpoint;
+}
+
+static int drop_privileges(CephContext *ctx)
+{
+  uid_t uid = ctx->get_set_uid();
+  gid_t gid = ctx->get_set_gid();
+  std::string uid_string = ctx->get_set_uid_string();
+  std::string gid_string = ctx->get_set_gid_string();
+  if (gid && setgid(gid) != 0) {
+    int err = errno;
+    ldout(ctx, -1) << "unable to setgid " << gid << ": " << cpp_strerror(err) << dendl;
+    return -err;
+  }
+  if (uid && setuid(uid) != 0) {
+    int err = errno;
+    ldout(ctx, -1) << "unable to setuid " << uid << ": " << cpp_strerror(err) << dendl;
+    return -err;
+  }
+  if (uid && gid) {
+    ldout(ctx, 0) << "set uid:gid to " << uid << ":" << gid
+                  << " (" << uid_string << ":" << gid_string << ")" << dendl;
+  }
+  return 0;
+}
+
+int AsioFrontend::init()
+{
+  boost::system::error_code ec;
+  auto& config = conf->get_config_map();
+
+  if (auto i = config.find("prefix"); i != config.end()) {
+    uri_prefix = i->second;
+  }
+
+// Setting global timeout
+  auto timeout = config.find("request_timeout_ms");
+  if (timeout != config.end()) {
+    auto timeout_number = ceph::parse<uint64_t>(timeout->second);
+    if (timeout_number) {
+      request_timeout =  std::chrono::milliseconds(*timeout_number);
+    } else {
+      lderr(ctx()) << "WARNING: invalid value for request_timeout_ms: "
+      << timeout->second << " setting it to the default value: "
+      << REQUEST_TIMEOUT << dendl;
+    }
+  }
+
+  auto max_header_size = config.find("max_header_size");
+  if (max_header_size != config.end()) {
+    auto limit = ceph::parse<uint64_t>(max_header_size->second);
+    if (!limit) {
+      lderr(ctx()) << "WARNING: invalid value for max_header_size: "
+          << max_header_size->second << ", using the default value: "
+          << header_limit << dendl;
+    } else if (*limit > parse_buffer_size) { // can't exceed parse buffer size
+      header_limit = parse_buffer_size;
+      lderr(ctx()) << "WARNING: max_header_size " << max_header_size->second
+          << " capped at maximum value " << header_limit << dendl;
+    } else {
+      header_limit = *limit;
+    }
+  }
+
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+  int r = init_ssl();
+  if (r < 0) {
+    return r;
+  }
+#endif
+
+  // parse endpoints
+  auto ports = config.equal_range("port");
+  for (auto i = ports.first; i != ports.second; ++i) {
+    auto port = parse_port(i->second.c_str(), ec);
+    if (ec) {
+      lderr(ctx()) << "failed to parse port=" << i->second << dendl;
+      return -ec.value();
+    }
+    listeners.emplace_back(context);
+    listeners.back().endpoint.port(port);
+
+    listeners.emplace_back(context);
+    listeners.back().endpoint = tcp::endpoint(tcp::v6(), port);
+  }
+
+  auto endpoints = config.equal_range("endpoint");
+  for (auto i = endpoints.first; i != endpoints.second; ++i) {
+    auto endpoint = parse_endpoint(i->second, 80, ec);
+    if (ec) {
+      lderr(ctx()) << "failed to parse endpoint=" << i->second << dendl;
+      return -ec.value();
+    }
+    listeners.emplace_back(context);
+    listeners.back().endpoint = endpoint;
+  }
+  // parse tcp nodelay
+  auto nodelay = config.find("tcp_nodelay");
+  if (nodelay != config.end()) {
+    for (auto& l : listeners) {
+      l.use_nodelay = (nodelay->second == "1");
+    }
+  }
+  
+
+  bool socket_bound = false;
+  // start listeners
+  for (auto& l : listeners) {
+    l.acceptor.open(l.endpoint.protocol(), ec);
+    if (ec) {
+      if (ec == boost::asio::error::address_family_not_supported) {
+	ldout(ctx(), 0) << "WARNING: cannot open socket for endpoint=" << l.endpoint
+			<< ", " << ec.message() << dendl;
+	continue;
+      }
+
+      lderr(ctx()) << "failed to open socket: " << ec.message() << dendl;
+      return -ec.value();
+    }
+
+    if (l.endpoint.protocol() == tcp::v6()) {
+      l.acceptor.set_option(boost::asio::ip::v6_only(true), ec);
+      if (ec) {
+        lderr(ctx()) << "failed to set v6_only socket option: "
+		     << ec.message() << dendl;
+	return -ec.value();
+      }
+    }
+
+    l.acceptor.set_option(tcp::acceptor::reuse_address(true));
+    l.acceptor.bind(l.endpoint, ec);
+    if (ec) {
+      lderr(ctx()) << "failed to bind address " << l.endpoint
+          << ": " << ec.message() << dendl;
+      return -ec.value();
+    }
+
+    auto it = config.find("max_connection_backlog");
+    auto max_connection_backlog = boost::asio::socket_base::max_listen_connections;
+    if (it != config.end()) {
+      string err;
+      max_connection_backlog = strict_strtol(it->second.c_str(), 10, &err);
+      if (!err.empty()) {
+        ldout(ctx(), 0) << "WARNING: invalid value for max_connection_backlog=" << it->second << dendl;
+        max_connection_backlog = boost::asio::socket_base::max_listen_connections;
+      }
+    }
+    l.acceptor.listen(max_connection_backlog);
+    l.acceptor.async_accept(l.socket,
+                            [this, &l] (boost::system::error_code ec) {
+                              accept(l, ec);
+                            });
+
+    ldout(ctx(), 4) << "frontend listening on " << l.endpoint << dendl;
+    socket_bound = true;
+  }
+  if (!socket_bound) {
+    lderr(ctx()) << "Unable to listen at any endpoints" << dendl;
+    return -EINVAL;
+  }
+
+  return drop_privileges(ctx());
+}
+
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+
+static string config_val_prefix = "config://";
+
+namespace {
+
+class ExpandMetaVar {
+  map<string, string> meta_map;
+
+public:
+  ExpandMetaVar(rgw::sal::Zone* zone_svc) {
+    meta_map["realm"] = zone_svc->get_realm_name();
+    meta_map["realm_id"] = zone_svc->get_realm_id();
+    meta_map["zonegroup"] = zone_svc->get_zonegroup().get_name();
+    meta_map["zonegroup_id"] = zone_svc->get_zonegroup().get_id();
+    meta_map["zone"] = zone_svc->get_name();
+    meta_map["zone_id"] = zone_svc->get_id();
+  }
+
+  string process_str(const string& in);
+};
+
+string ExpandMetaVar::process_str(const string& in)
+{
+  if (meta_map.empty()) {
+    return in;
+  }
+
+  auto pos = in.find('$');
+  if (pos == std::string::npos) {
+    return in;
+  }
+
+  string out;
+  decltype(pos) last_pos = 0;
+
+  while (pos != std::string::npos) {
+    if (pos > last_pos) {
+      out += in.substr(last_pos, pos - last_pos);
+    }
+
+    string var;
+    const char *valid_chars = "abcdefghijklmnopqrstuvwxyz_";
+
+    size_t endpos = 0;
+    if (in[pos+1] == '{') {
+      // ...${foo_bar}...
+      endpos = in.find_first_not_of(valid_chars, pos + 2);
+      if (endpos != std::string::npos &&
+	  in[endpos] == '}') {
+	var = in.substr(pos + 2, endpos - pos - 2);
+	endpos++;
+      }
+    } else {
+      // ...$foo...
+      endpos = in.find_first_not_of(valid_chars, pos + 1);
+      if (endpos != std::string::npos)
+	var = in.substr(pos + 1, endpos - pos - 1);
+      else
+	var = in.substr(pos + 1);
+    }
+    string var_source = in.substr(pos, endpos - pos);
+    last_pos = endpos;
+
+    auto iter = meta_map.find(var);
+    if (iter != meta_map.end()) {
+      out += iter->second;
+    } else {
+      out += var_source;
+    }
+    pos = in.find('$', last_pos);
+  }
+  if (last_pos != std::string::npos) {
+    out += in.substr(last_pos);
+  }
+
+  return out;
+}
+
+} /* anonymous namespace */
+
+int AsioFrontend::get_config_key_val(string name,
+                                     const string& type,
+                                     bufferlist *pbl)
+{
+  if (name.empty()) {
+    lderr(ctx()) << "bad " << type << " config value" << dendl;
+    return -EINVAL;
+  }
+
+  int r = env.driver->get_config_key_val(name, pbl);
+  if (r < 0) {
+    lderr(ctx()) << type << " was not found: " << name << dendl;
+    return r;
+  }
+  return 0;
+}
+
+int AsioFrontend::ssl_set_private_key(const string& name, bool is_ssl_certificate)
+{
+  boost::system::error_code ec;
+
+  if (!boost::algorithm::starts_with(name, config_val_prefix)) {
+    ssl_context->use_private_key_file(name, ssl::context::pem, ec);
+  } else {
+    bufferlist bl;
+    int r = get_config_key_val(name.substr(config_val_prefix.size()),
+                               "ssl_private_key",
+                               &bl);
+    if (r < 0) {
+      return r;
+    }
+    ssl_context->use_private_key(boost::asio::buffer(bl.c_str(), bl.length()),
+                                 ssl::context::pem, ec);
+  }
+
+  if (ec) {
+    if (!is_ssl_certificate) {
+      lderr(ctx()) << "failed to add ssl_private_key=" << name
+        << ": " << ec.message() << dendl;
+    } else {
+      lderr(ctx()) << "failed to use ssl_certificate=" << name
+        << " as a private key: " << ec.message() << dendl;
+    }
+    return -ec.value();
+  }
+
+  return 0;
+}
+
+int AsioFrontend::ssl_set_certificate_chain(const string& name)
+{
+  boost::system::error_code ec;
+
+  if (!boost::algorithm::starts_with(name, config_val_prefix)) {
+    ssl_context->use_certificate_chain_file(name, ec);
+  } else {
+    bufferlist bl;
+    int r = get_config_key_val(name.substr(config_val_prefix.size()),
+                               "ssl_certificate",
+                               &bl);
+    if (r < 0) {
+      return r;
+    }
+    ssl_context->use_certificate_chain(boost::asio::buffer(bl.c_str(), bl.length()),
+                                 ec);
+  }
+
+  if (ec) {
+    lderr(ctx()) << "failed to use ssl_certificate=" << name
+      << ": " << ec.message() << dendl;
+    return -ec.value();
+  }
+
+  return 0;
+}
+
+int AsioFrontend::init_ssl()
+{
+  boost::system::error_code ec;
+  auto& config = conf->get_config_map();
+
+  // ssl configuration
+  std::optional<string> cert = conf->get_val("ssl_certificate");
+  if (cert) {
+    // only initialize the ssl context if it's going to be used
+    ssl_context = boost::in_place(ssl::context::tls);
+  }
+
+  std::optional<string> key = conf->get_val("ssl_private_key");
+  bool have_cert = false;
+
+  if (key && !cert) {
+    lderr(ctx()) << "no ssl_certificate configured for ssl_private_key" << dendl;
+    return -EINVAL;
+  }
+
+  std::optional<string> options = conf->get_val("ssl_options");
+  if (options) {
+    if (!cert) {
+      lderr(ctx()) << "no ssl_certificate configured for ssl_options" << dendl;
+      return -EINVAL;
+    }
+  } else if (cert) {
+    options = "no_sslv2:no_sslv3:no_tlsv1:no_tlsv1_1";
+  }
+
+  if (options) {
+    for (auto &option : ceph::split(*options, ":")) {
+      if (option == "default_workarounds") {
+        ssl_context->set_options(ssl::context::default_workarounds);
+      } else if (option == "no_compression") {
+        ssl_context->set_options(ssl::context::no_compression);
+      } else if (option == "no_sslv2") {
+        ssl_context->set_options(ssl::context::no_sslv2);
+      } else if (option == "no_sslv3") {
+        ssl_context->set_options(ssl::context::no_sslv3);
+      } else if (option == "no_tlsv1") {
+        ssl_context->set_options(ssl::context::no_tlsv1);
+      } else if (option == "no_tlsv1_1") {
+        ssl_context->set_options(ssl::context::no_tlsv1_1);
+      } else if (option == "no_tlsv1_2") {
+        ssl_context->set_options(ssl::context::no_tlsv1_2);
+      } else if (option == "single_dh_use") {
+        ssl_context->set_options(ssl::context::single_dh_use);
+      } else {
+        lderr(ctx()) << "ignoring unknown ssl option '" << option << "'" << dendl;
+      }
+    }
+  }
+
+  std::optional<string> ciphers = conf->get_val("ssl_ciphers");
+  if (ciphers) {
+    if (!cert) {
+      lderr(ctx()) << "no ssl_certificate configured for ssl_ciphers" << dendl;
+      return -EINVAL;
+    }
+
+    int r = SSL_CTX_set_cipher_list(ssl_context->native_handle(),
+                                    ciphers->c_str());
+    if (r == 0) {
+      lderr(ctx()) << "no cipher could be selected from ssl_ciphers: "
+                   << *ciphers << dendl;
+      return -EINVAL;
+    }
+  }
+
+  auto ports = config.equal_range("ssl_port");
+  auto endpoints = config.equal_range("ssl_endpoint");
+
+  /*
+   * don't try to config certificate if frontend isn't configured for ssl
+   */
+  if (ports.first == ports.second &&
+      endpoints.first == endpoints.second) {
+    return 0;
+  }
+
+  bool key_is_cert = false;
+
+  if (cert) {
+    if (!key) {
+      key = cert;
+      key_is_cert = true;
+    }
+
+    ExpandMetaVar emv(env.driver->get_zone());
+
+    cert = emv.process_str(*cert);
+    key = emv.process_str(*key);
+
+    int r = ssl_set_private_key(*key, key_is_cert);
+    bool have_private_key = (r >= 0);
+    if (r < 0) {
+      if (!key_is_cert) {
+        r = ssl_set_private_key(*cert, true);
+        have_private_key = (r >= 0);
+      }
+    }
+
+    if (have_private_key) {
+      int r = ssl_set_certificate_chain(*cert);
+      have_cert = (r >= 0);
+    }
+  }
+
+  // parse ssl endpoints
+  for (auto i = ports.first; i != ports.second; ++i) {
+    if (!have_cert) {
+      lderr(ctx()) << "no ssl_certificate configured for ssl_port" << dendl;
+      return -EINVAL;
+    }
+    auto port = parse_port(i->second.c_str(), ec);
+    if (ec) {
+      lderr(ctx()) << "failed to parse ssl_port=" << i->second << dendl;
+      return -ec.value();
+    }
+    listeners.emplace_back(context);
+    listeners.back().endpoint.port(port);
+    listeners.back().use_ssl = true;
+
+    listeners.emplace_back(context);
+    listeners.back().endpoint = tcp::endpoint(tcp::v6(), port);
+    listeners.back().use_ssl = true;
+  }
+
+  for (auto i = endpoints.first; i != endpoints.second; ++i) {
+    if (!have_cert) {
+      lderr(ctx()) << "no ssl_certificate configured for ssl_endpoint" << dendl;
+      return -EINVAL;
+    }
+    auto endpoint = parse_endpoint(i->second, 443, ec);
+    if (ec) {
+      lderr(ctx()) << "failed to parse ssl_endpoint=" << i->second << dendl;
+      return -ec.value();
+    }
+    listeners.emplace_back(context);
+    listeners.back().endpoint = endpoint;
+    listeners.back().use_ssl = true;
+  }
+  return 0;
+}
+#endif // WITH_RADOSGW_BEAST_OPENSSL
+
+void AsioFrontend::accept(Listener& l, boost::system::error_code ec)
+{
+  if (!l.acceptor.is_open()) {
+    return;
+  } else if (ec == boost::asio::error::operation_aborted) {
+    return;
+  } else if (ec) {
+    ldout(ctx(), 1) << "accept failed: " << ec.message() << dendl;
+    return;
+  }
+  auto stream = std::move(l.socket);
+  stream.set_option(tcp::no_delay(l.use_nodelay), ec);
+  l.acceptor.async_accept(l.socket,
+                          [this, &l] (boost::system::error_code ec) {
+                            accept(l, ec);
+                          });
+  
+  // spawn a coroutine to handle the connection
+#ifdef WITH_RADOSGW_BEAST_OPENSSL
+  if (l.use_ssl) {
+    spawn::spawn(context,
+      [this, s=std::move(stream)] (yield_context yield) mutable {
+        auto conn = boost::intrusive_ptr{new Connection(std::move(s))};
+        auto c = connections.add(*conn);
+        // wrap the tcp stream in an ssl stream
+        boost::asio::ssl::stream<tcp_socket&> stream{conn->socket, *ssl_context};
+        auto timeout = timeout_timer{context.get_executor(), request_timeout, conn};
+        // do ssl handshake
+        boost::system::error_code ec;
+        timeout.start();
+        auto bytes = stream.async_handshake(ssl::stream_base::server,
+                                            conn->buffer.data(), yield[ec]);
+        timeout.cancel();
+        if (ec) {
+          ldout(ctx(), 1) << "ssl handshake failed: " << ec.message() << dendl;
+          return;
+        }
+        conn->buffer.consume(bytes);
+        handle_connection(context, env, stream, timeout, header_limit,
+                          conn->buffer, true, pause_mutex, scheduler.get(),
+                          uri_prefix, ec, yield);
+        if (!ec) {
+          // ssl shutdown (ignoring errors)
+          stream.async_shutdown(yield[ec]);
+        }
+        conn->socket.shutdown(tcp::socket::shutdown_both, ec);
+      }, make_stack_allocator());
+  } else {
+#else
+  {
+#endif // WITH_RADOSGW_BEAST_OPENSSL
+    spawn::spawn(context,
+      [this, s=std::move(stream)] (yield_context yield) mutable {
+        auto conn = boost::intrusive_ptr{new Connection(std::move(s))};
+        auto c = connections.add(*conn);
+        auto timeout = timeout_timer{context.get_executor(), request_timeout, conn};
+        boost::system::error_code ec;
+        handle_connection(context, env, conn->socket, timeout, header_limit,
+                          conn->buffer, false, pause_mutex, scheduler.get(),
+                          uri_prefix, ec, yield);
+        conn->socket.shutdown(tcp_socket::shutdown_both, ec);
+      }, make_stack_allocator());
+  }
+}
+
+int AsioFrontend::run()
+{
+  auto cct = ctx();
+  const int thread_count = cct->_conf->rgw_thread_pool_size;
+  threads.reserve(thread_count);
+
+  ldout(cct, 4) << "frontend spawning " << thread_count << " threads" << dendl;
+
+  // the worker threads call io_context::run(), which will return when there's
+  // no work left. hold a work guard to keep these threads going until join()
+  work.emplace(boost::asio::make_work_guard(context));
+
+  for (int i = 0; i < thread_count; i++) {
+    threads.emplace_back([this]() noexcept {
+      // request warnings on synchronous librados calls in this thread
+      is_asio_thread = true;
+      // Have uncaught exceptions kill the process and give a
+      // stacktrace, not be swallowed.
+      context.run();
+    });
+  }
+  return 0;
+}
+
+void AsioFrontend::stop()
+{
+  ldout(ctx(), 4) << "frontend initiating shutdown..." << dendl;
+
+  going_down = true;
+
+  boost::system::error_code ec;
+  // close all listeners
+  for (auto& listener : listeners) {
+    listener.acceptor.close(ec);
+  }
+  // close all connections
+  connections.close(ec);
+  pause_mutex.cancel();
+}
+
+void AsioFrontend::join()
+{
+  if (!going_down) {
+    stop();
+  }
+  work.reset();
+
+  ldout(ctx(), 4) << "frontend joining threads..." << dendl;
+  for (auto& thread : threads) {
+    thread.join();
+  }
+  ldout(ctx(), 4) << "frontend done" << dendl;
+}
+
+void AsioFrontend::pause()
+{
+  ldout(ctx(), 4) << "frontend pausing connections..." << dendl;
+
+  // cancel pending calls to accept(), but don't close the sockets
+  boost::system::error_code ec;
+  for (auto& l : listeners) {
+    l.acceptor.cancel(ec);
+  }
+
+  // pause and wait for outstanding requests to complete
+  pause_mutex.lock(ec);
+
+  if (ec) {
+    ldout(ctx(), 1) << "frontend failed to pause: " << ec.message() << dendl;
+  } else {
+    ldout(ctx(), 4) << "frontend paused" << dendl;
+  }
+}
+
+void AsioFrontend::unpause()
+{
+  // unpause to unblock connections
+  pause_mutex.unlock();
+
+  // start accepting connections again
+  for (auto& l : listeners) {
+    l.acceptor.async_accept(l.socket,
+                            [this, &l] (boost::system::error_code ec) {
+                              accept(l, ec);
+                            });
+  }
+
+  ldout(ctx(), 4) << "frontend unpaused" << dendl;
+}
+
+} // anonymous namespace
+
+class RGWAsioFrontend::Impl : public AsioFrontend {
+ public:
+  Impl(RGWProcessEnv& env, RGWFrontendConfig* conf,
+       rgw::dmclock::SchedulerCtx& sched_ctx)
+    : AsioFrontend(env, conf, sched_ctx) {}
+};
+
+RGWAsioFrontend::RGWAsioFrontend(RGWProcessEnv& env,
+                                 RGWFrontendConfig* conf,
+				 rgw::dmclock::SchedulerCtx& sched_ctx)
+  : impl(new Impl(env, conf, sched_ctx))
+{
+}
+
+RGWAsioFrontend::~RGWAsioFrontend() = default;
+
+int RGWAsioFrontend::init()
+{
+  return impl->init();
+}
+
+int RGWAsioFrontend::run()
+{
+  return impl->run();
+}
+
+void RGWAsioFrontend::stop()
+{
+  impl->stop();
+}
+
+void RGWAsioFrontend::join()
+{
+  impl->join();
+}
+
+void RGWAsioFrontend::pause_for_new_config()
+{
+  impl->pause();
+}
+
+void RGWAsioFrontend::unpause_with_new_config()
+{
+  impl->unpause();
+}