+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include <errno.h>
+#include <limits.h>
+#include <boost/algorithm/string.hpp>
+#include <boost/tokenizer.hpp>
+#include "common/Formatter.h"
+#include "common/HTMLFormatter.h"
+#include "common/utf8.h"
+#include "include/str_list.h"
+#include "rgw_common.h"
+#include "rgw_rados.h"
+#include "rgw_zone.h"
+#include "rgw_auth_s3.h"
+#include "rgw_formats.h"
+#include "rgw_op.h"
+#include "rgw_rest.h"
+#include "rgw_rest_swift.h"
+#include "rgw_rest_s3.h"
+#include "rgw_swift_auth.h"
+#include "rgw_cors_s3.h"
+#include "rgw_perf_counters.h"
+#include "rgw_client_io.h"
+#include "rgw_resolve.h"
+#include <numeric>
+#define dout_subsys ceph_subsys_rgw
+struct rgw_http_status_code {
+ int code;
+ const char *name;
+const static struct rgw_http_status_code http_codes[] = {
+ { 100, "Continue" },
+ { 200, "OK" },
+ { 201, "Created" },
+ { 202, "Accepted" },
+ { 204, "No Content" },
+ { 205, "Reset Content" },
+ { 206, "Partial Content" },
+ { 207, "Multi Status" },
+ { 208, "Already Reported" },
+ { 300, "Multiple Choices" },
+ { 301, "Moved Permanently" },
+ { 302, "Found" },
+ { 303, "See Other" },
+ { 304, "Not Modified" },
+ { 305, "User Proxy" },
+ { 306, "Switch Proxy" },
+ { 307, "Temporary Redirect" },
+ { 308, "Permanent Redirect" },
+ { 400, "Bad Request" },
+ { 401, "Unauthorized" },
+ { 402, "Payment Required" },
+ { 403, "Forbidden" },
+ { 404, "Not Found" },
+ { 405, "Method Not Allowed" },
+ { 406, "Not Acceptable" },
+ { 407, "Proxy Authentication Required" },
+ { 408, "Request Timeout" },
+ { 409, "Conflict" },
+ { 410, "Gone" },
+ { 411, "Length Required" },
+ { 412, "Precondition Failed" },
+ { 413, "Request Entity Too Large" },
+ { 414, "Request-URI Too Long" },
+ { 415, "Unsupported Media Type" },
+ { 416, "Requested Range Not Satisfiable" },
+ { 417, "Expectation Failed" },
+ { 422, "Unprocessable Entity" },
+ { 498, "Rate Limited"},
+ { 500, "Internal Server Error" },
+ { 501, "Not Implemented" },
+ { 503, "Slow Down"},
+ { 0, NULL },
+struct rgw_http_attr {
+ const char *rgw_attr;
+ const char *http_attr;
+ * mapping between rgw object attrs and output http fields
+ */
+static const struct rgw_http_attr base_rgw_to_http_attrs[] = {
+ { RGW_ATTR_CONTENT_LANG, "Content-Language" },
+ { RGW_ATTR_EXPIRES, "Expires" },
+ { RGW_ATTR_CACHE_CONTROL, "Cache-Control" },
+ { RGW_ATTR_CONTENT_DISP, "Content-Disposition" },
+ { RGW_ATTR_CONTENT_ENC, "Content-Encoding" },
+ { RGW_ATTR_USER_MANIFEST, "X-Object-Manifest" },
+ { RGW_ATTR_X_ROBOTS_TAG , "X-Robots-Tag" },
+ { RGW_ATTR_STORAGE_CLASS , "X-Amz-Storage-Class" },
+ /* RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION header depends on access mode:
+ * S3 endpoint: x-amz-website-redirect-location
+ * S3Website endpoint: Location
+ */
+ { RGW_ATTR_AMZ_WEBSITE_REDIRECT_LOCATION, "x-amz-website-redirect-location" },
+struct generic_attr {
+ const char *http_header;
+ const char *rgw_attr;
+ * mapping between http env fields and rgw object attrs
+ */
+static const struct generic_attr generic_attrs[] = {
+map<string, string> rgw_to_http_attrs;
+static map<string, string> generic_attrs_map;
+map<int, const char *> http_status_names;
+ * make attrs look_like_this
+ * converts dashes to underscores
+ */
+string lowercase_underscore_http_attr(const string& orig)
+ const char *s = orig.c_str();
+ char buf[orig.size() + 1];
+ buf[orig.size()] = '\0';
+ for (size_t i = 0; i < orig.size(); ++i, ++s) {
+ switch (*s) {
+ case '-':
+ buf[i] = '_';
+ break;
+ default:
+ buf[i] = tolower(*s);
+ }
+ }
+ return string(buf);
+ * make attrs LOOK_LIKE_THIS
+ * converts dashes to underscores
+ */
+string uppercase_underscore_http_attr(const string& orig)
+ const char *s = orig.c_str();
+ char buf[orig.size() + 1];
+ buf[orig.size()] = '\0';
+ for (size_t i = 0; i < orig.size(); ++i, ++s) {
+ switch (*s) {
+ case '-':
+ buf[i] = '_';
+ break;
+ default:
+ buf[i] = toupper(*s);
+ }
+ }
+ return string(buf);
+/* avoid duplicate hostnames in hostnames lists */
+static set<string> hostnames_set;
+static set<string> hostnames_s3website_set;
+void rgw_rest_init(CephContext *cct, RGWRados *store, const RGWZoneGroup& zone_group)
+ for (const auto& rgw2http : base_rgw_to_http_attrs) {
+ rgw_to_http_attrs[rgw2http.rgw_attr] = rgw2http.http_attr;
+ }
+ for (const auto& http2rgw : generic_attrs) {
+ generic_attrs_map[http2rgw.http_header] = http2rgw.rgw_attr;
+ }
+ list<string> extended_http_attrs;
+ get_str_list(cct->_conf->rgw_extended_http_attrs, extended_http_attrs);
+ list<string>::iterator iter;
+ for (iter = extended_http_attrs.begin(); iter != extended_http_attrs.end(); ++iter) {
+ string rgw_attr = RGW_ATTR_PREFIX;
+ rgw_attr.append(lowercase_underscore_http_attr(*iter));
+ rgw_to_http_attrs[rgw_attr] = camelcase_dash_http_attr(*iter);
+ string http_header = "HTTP_";
+ http_header.append(uppercase_underscore_http_attr(*iter));
+ generic_attrs_map[http_header] = rgw_attr;
+ }
+ for (const struct rgw_http_status_code *h = http_codes; h->code; h++) {
+ http_status_names[h->code] = h->name;
+ }
+ hostnames_set.insert(cct->_conf->rgw_dns_name);
+ hostnames_set.insert(zone_group.hostnames.begin(), zone_group.hostnames.end());
+ hostnames_set.erase(""); // filter out empty hostnames
+ ldout(cct, 20) << "RGW hostnames: " << hostnames_set << dendl;
+ /* TODO: We should have a sanity check that no hostname matches the end of
+ * any other hostname, otherwise we will get ambigious results from
+ * rgw_find_host_in_domains.
+ * Eg:
+ * Hostnames: [A, B.A]
+ * Inputs: [Z.A, X.B.A]
+ * Z.A clearly splits to subdomain=Z, domain=Z
+ * X.B.A ambigously splits to both {X, B.A} and {X.B, A}
+ */
+ hostnames_s3website_set.insert(cct->_conf->rgw_dns_s3website_name);
+ hostnames_s3website_set.insert(zone_group.hostnames_s3website.begin(), zone_group.hostnames_s3website.end());
+ hostnames_s3website_set.erase(""); // filter out empty hostnames
+ ldout(cct, 20) << "RGW S3website hostnames: " << hostnames_s3website_set << dendl;
+ /* TODO: we should repeat the hostnames_set sanity check here
+ * and ALSO decide about overlap, if any
+ */
+static bool str_ends_with_nocase(const string& s, const string& suffix, size_t *pos)
+ size_t len = suffix.size();
+ if (len > (size_t)s.size()) {
+ return false;
+ }
+ ssize_t p = s.size() - len;
+ if (pos) {
+ *pos = p;
+ }
+ return boost::algorithm::iends_with(s, suffix);
+static bool rgw_find_host_in_domains(const string& host, string *domain, string *subdomain,
+ const set<string>& valid_hostnames_set)
+ set<string>::iterator iter;
+ /** TODO, Future optimization
+ * store hostnames_set elements _reversed_, and look for a prefix match,
+ * which is much faster than a suffix match.
+ */
+ for (iter = valid_hostnames_set.begin(); iter != valid_hostnames_set.end(); ++iter) {
+ size_t pos;
+ if (!str_ends_with_nocase(host, *iter, &pos))
+ continue;
+ if (pos == 0) {
+ *domain = host;
+ subdomain->clear();
+ } else {
+ if (host[pos - 1] != '.') {
+ continue;
+ }
+ *domain = host.substr(pos);
+ *subdomain = host.substr(0, pos - 1);
+ }
+ return true;
+ }
+ return false;
+static void dump_status(struct req_state *s, int status,
+ const char *status_name)
+ s->formatter->set_status(status, status_name);
+ try {
+ RESTFUL_IO(s)->send_status(status, status_name);
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: s->cio->send_status() returned err="
+ << e.what() << dendl;
+ }
+void rgw_flush_formatter_and_reset(struct req_state *s, Formatter *formatter)
+ std::ostringstream oss;
+ formatter->output_footer();
+ formatter->flush(oss);
+ std::string outs(oss.str());
+ if (!outs.empty() && s->op != OP_HEAD) {
+ dump_body(s, outs);
+ }
+ s->formatter->reset();
+void rgw_flush_formatter(struct req_state *s, Formatter *formatter)
+ std::ostringstream oss;
+ formatter->flush(oss);
+ std::string outs(oss.str());
+ if (!outs.empty() && s->op != OP_HEAD) {
+ dump_body(s, outs);
+ }
+void dump_errno(int http_ret, string& out) {
+ stringstream ss;
+ ss << http_ret << " " << http_status_names[http_ret];
+ out = ss.str();
+void dump_errno(const struct rgw_err &err, string& out) {
+ dump_errno(err.http_ret, out);
+void dump_errno(struct req_state *s)
+ dump_status(s, s->err.http_ret, http_status_names[s->err.http_ret]);
+void dump_errno(struct req_state *s, int http_ret)
+ dump_status(s, http_ret, http_status_names[http_ret]);
+void dump_header(struct req_state* const s,
+ const boost::string_ref& name,
+ const boost::string_ref& val)
+ try {
+ RESTFUL_IO(s)->send_header(name, val);
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: s->cio->send_header() returned err="
+ << e.what() << dendl;
+ }
+void dump_header(struct req_state* const s,
+ const boost::string_ref& name,
+ ceph::buffer::list& bl)
+ return dump_header(s, name, rgw_sanitized_hdrval(bl));
+void dump_header(struct req_state* const s,
+ const boost::string_ref& name,
+ const long long val)
+ char buf[32];
+ const auto len = snprintf(buf, sizeof(buf), "%lld", val);
+ return dump_header(s, name, boost::string_ref(buf, len));
+void dump_header(struct req_state* const s,
+ const boost::string_ref& name,
+ const utime_t& ut)
+ char buf[32];
+ const auto len = snprintf(buf, sizeof(buf), "%lld.%05d",
+ static_cast<long long>(ut.sec()),
+ static_cast<int>(ut.usec() / 10));
+ return dump_header(s, name, boost::string_ref(buf, len));
+void dump_content_length(struct req_state* const s, const uint64_t len)
+ try {
+ RESTFUL_IO(s)->send_content_length(len);
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: s->cio->send_content_length() returned err="
+ << e.what() << dendl;
+ }
+ dump_header(s, "Accept-Ranges", "bytes");
+static void dump_chunked_encoding(struct req_state* const s)
+ try {
+ RESTFUL_IO(s)->send_chunked_transfer_encoding();
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: RESTFUL_IO(s)->send_chunked_transfer_encoding()"
+ << " returned err=" << e.what() << dendl;
+ }
+void dump_etag(struct req_state* const s,
+ const boost::string_ref& etag,
+ const bool quoted)
+ if (etag.empty()) {
+ return;
+ }
+ if (s->prot_flags & RGW_REST_SWIFT && ! quoted) {
+ return dump_header(s, "etag", etag);
+ } else {
+ return dump_header_quoted(s, "ETag", etag);
+ }
+void dump_bucket_from_state(struct req_state *s)
+ if (g_conf()->rgw_expose_bucket && ! s->bucket_name.empty()) {
+ if (! s->bucket_tenant.empty()) {
+ dump_header(s, "Bucket",
+ url_encode(s->bucket_tenant + "/" + s->bucket_name));
+ } else {
+ dump_header(s, "Bucket", url_encode(s->bucket_name));
+ }
+ }
+void dump_redirect(struct req_state * const s, const std::string& redirect)
+ return dump_header_if_nonempty(s, "Location", redirect);
+static size_t dump_time_header_impl(char (&timestr)[TIME_BUF_SIZE],
+ const real_time t)
+ const utime_t ut(t);
+ time_t secs = static_cast<time_t>(ut.sec());
+ struct tm result;
+ const struct tm * const tmp = gmtime_r(&secs, &result);
+ if (tmp == nullptr) {
+ return 0;
+ }
+ return strftime(timestr, sizeof(timestr),
+ "%a, %d %b %Y %H:%M:%S %Z", tmp);
+void dump_time_header(struct req_state *s, const char *name, real_time t)
+ char timestr[TIME_BUF_SIZE];
+ const size_t len = dump_time_header_impl(timestr, t);
+ if (len == 0) {
+ return;
+ }
+ return dump_header(s, name, boost::string_ref(timestr, len));
+std::string dump_time_to_str(const real_time& t)
+ char timestr[TIME_BUF_SIZE];
+ dump_time_header_impl(timestr, t);
+ return timestr;
+void dump_last_modified(struct req_state *s, real_time t)
+ dump_time_header(s, "Last-Modified", t);
+void dump_epoch_header(struct req_state *s, const char *name, real_time t)
+ utime_t ut(t);
+ char buf[65];
+ const auto len = snprintf(buf, sizeof(buf), "%lld.%09lld",
+ (long long)ut.sec(),
+ (long long)ut.nsec());
+ return dump_header(s, name, boost::string_ref(buf, len));
+void dump_time(struct req_state *s, const char *name, real_time *t)
+ char buf[TIME_BUF_SIZE];
+ rgw_to_iso8601(*t, buf, sizeof(buf));
+ s->formatter->dump_string(name, buf);
+void dump_owner(struct req_state *s, const rgw_user& id, string& name,
+ const char *section)
+ if (!section)
+ section = "Owner";
+ s->formatter->open_object_section(section);
+ s->formatter->dump_string("ID", id.to_str());
+ s->formatter->dump_string("DisplayName", name);
+ s->formatter->close_section();
+void dump_access_control(struct req_state *s, const char *origin,
+ const char *meth,
+ const char *hdr, const char *exp_hdr,
+ uint32_t max_age) {
+ if (origin && (origin[0] != '\0')) {
+ dump_header(s, "Access-Control-Allow-Origin", origin);
+ /* If the server specifies an origin host rather than "*",
+ * then it must also include Origin in the Vary response header
+ * to indicate to clients that server responses will differ
+ * based on the value of the Origin request header.
+ */
+ if (strcmp(origin, "*") != 0) {
+ dump_header(s, "Vary", "Origin");
+ }
+ if (meth && (meth[0] != '\0')) {
+ dump_header(s, "Access-Control-Allow-Methods", meth);
+ }
+ if (hdr && (hdr[0] != '\0')) {
+ dump_header(s, "Access-Control-Allow-Headers", hdr);
+ }
+ if (exp_hdr && (exp_hdr[0] != '\0')) {
+ dump_header(s, "Access-Control-Expose-Headers", exp_hdr);
+ }
+ if (max_age != CORS_MAX_AGE_INVALID) {
+ dump_header(s, "Access-Control-Max-Age", max_age);
+ }
+ }
+void dump_access_control(req_state *s, RGWOp *op)
+ string origin;
+ string method;
+ string header;
+ string exp_header;
+ unsigned max_age = CORS_MAX_AGE_INVALID;
+ if (!op->generate_cors_headers(origin, method, header, exp_header, &max_age))
+ return;
+ dump_access_control(s, origin.c_str(), method.c_str(), header.c_str(),
+ exp_header.c_str(), max_age);
+void dump_start(struct req_state *s)
+ if (!s->content_started) {
+ s->formatter->output_header();
+ s->content_started = true;
+ }
+void dump_trans_id(req_state *s)
+ if (s->prot_flags & RGW_REST_SWIFT) {
+ dump_header(s, "X-Trans-Id", s->trans_id);
+ dump_header(s, "X-Openstack-Request-Id", s->trans_id);
+ } else if (s->trans_id.length()) {
+ dump_header(s, "x-amz-request-id", s->trans_id);
+ }
+void end_header(struct req_state* s, RGWOp* op, const char *content_type,
+ const int64_t proposed_content_length, bool force_content_type,
+ bool force_no_error)
+ string ctype;
+ dump_trans_id(s);
+ if ((!s->is_err()) &&
+ (s->bucket_info.owner != s->user->user_id) &&
+ (s->bucket_info.requester_pays)) {
+ dump_header(s, "x-amz-request-charged", "requester");
+ }
+ if (op) {
+ dump_access_control(s, op);
+ }
+ if (s->prot_flags & RGW_REST_SWIFT && !content_type) {
+ force_content_type = true;
+ }
+ /* do not send content type if content length is zero
+ and the content type was not set by the user */
+ if (force_content_type ||
+ (!content_type && s->formatter->get_len() != 0) || s->is_err()){
+ switch (s->format) {
+ ctype = "application/xml";
+ break;
+ ctype = "application/json";
+ break;
+ ctype = "text/html";
+ break;
+ default:
+ ctype = "text/plain";
+ break;
+ }
+ if (s->prot_flags & RGW_REST_SWIFT)
+ ctype.append("; charset=utf-8");
+ content_type = ctype.c_str();
+ }
+ if (!force_no_error && s->is_err()) {
+ dump_start(s);
+ dump(s);
+ dump_content_length(s, s->formatter->get_len());
+ } else {
+ if (proposed_content_length == CHUNKED_TRANSFER_ENCODING) {
+ dump_chunked_encoding(s);
+ } else if (proposed_content_length != NO_CONTENT_LENGTH) {
+ dump_content_length(s, proposed_content_length);
+ }
+ }
+ if (content_type) {
+ dump_header(s, "Content-Type", content_type);
+ }
+ dump_header_if_nonempty(s, "Server", g_conf()->rgw_service_provider_name);
+ try {
+ RESTFUL_IO(s)->complete_header();
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: RESTFUL_IO(s)->complete_header() returned err="
+ << e.what() << dendl;
+ }
+ ACCOUNTING_IO(s)->set_account(true);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+static void build_redirect_url(req_state *s, const string& redirect_base, string *redirect_url)
+ string& dest_uri = *redirect_url;
+ dest_uri = redirect_base;
+ /*
+ * reqest_uri is always start with slash, so we need to remove
+ * the unnecessary slash at the end of dest_uri.
+ */
+ if (dest_uri[dest_uri.size() - 1] == '/') {
+ dest_uri = dest_uri.substr(0, dest_uri.size() - 1);
+ }
+ dest_uri += s->info.request_uri;
+ dest_uri += "?";
+ dest_uri += s->info.request_params;
+void abort_early(struct req_state *s, RGWOp* op, int err_no,
+ RGWHandler* handler)
+ string error_content("");
+ if (!s->formatter) {
+ s->formatter = new JSONFormatter;
+ s->format = RGW_FORMAT_JSON;
+ }
+ // op->error_handler is responsible for calling it's handler error_handler
+ if (op != NULL) {
+ int new_err_no;
+ new_err_no = op->error_handler(err_no, &error_content);
+ ldout(s->cct, 20) << "op->ERRORHANDLER: err_no=" << err_no
+ << " new_err_no=" << new_err_no << dendl;
+ err_no = new_err_no;
+ } else if (handler != NULL) {
+ int new_err_no;
+ new_err_no = handler->error_handler(err_no, &error_content);
+ ldout(s->cct, 20) << "handler->ERRORHANDLER: err_no=" << err_no
+ << " new_err_no=" << new_err_no << dendl;
+ err_no = new_err_no;
+ }
+ // If the error handler(s) above dealt with it completely, they should have
+ // returned 0. If non-zero, we need to continue here.
+ if (err_no) {
+ // Watch out, we might have a custom error state already set!
+ if (!s->err.http_ret || s->err.http_ret == 200) {
+ set_req_state_err(s, err_no);
+ }
+ if (s->err.http_ret == 404 && !s->redirect_zone_endpoint.empty()) {
+ s->err.http_ret = 301;
+ build_redirect_url(s, s->redirect_zone_endpoint, &s->redirect);
+ }
+ dump_errno(s);
+ dump_bucket_from_state(s);
+ if (err_no == -ERR_PERMANENT_REDIRECT || err_no == -ERR_WEBSITE_REDIRECT) {
+ string dest_uri;
+ if (!s->redirect.empty()) {
+ dest_uri = s->redirect;
+ } else if (!s->zonegroup_endpoint.empty()) {
+ build_redirect_url(s, s->zonegroup_endpoint, &dest_uri);
+ }
+ if (!dest_uri.empty()) {
+ dump_redirect(s, dest_uri);
+ }
+ }
+ if (!error_content.empty()) {
+ /*
+ * TODO we must add all error entries as headers here:
+ * when having a working errordoc, then the s3 error fields are
+ * rendered as HTTP headers, e.g.:
+ * x-amz-error-code: NoSuchKey
+ * x-amz-error-message: The specified key does not exist.
+ * x-amz-error-detail-Key: foo
+ */
+ end_header(s, op, NULL, error_content.size(), false, true);
+ RESTFUL_IO(s)->send_body(error_content.c_str(), error_content.size());
+ } else {
+ end_header(s, op);
+ }
+ }
+ perfcounter->inc(l_rgw_failed_req);
+void dump_continue(struct req_state * const s)
+ try {
+ RESTFUL_IO(s)->send_100_continue();
+ } catch (rgw::io::Exception& e) {
+ ldout(s->cct, 0) << "ERROR: RESTFUL_IO(s)->send_100_continue() returned err="
+ << e.what() << dendl;
+ }
+void dump_range(struct req_state* const s,
+ const uint64_t ofs,
+ const uint64_t end,
+ const uint64_t total)
+ /* dumping range into temp buffer first, as libfcgi will fail to digest
+ * %lld */
+ char range_buf[128];
+ size_t len;
+ if (! total) {
+ len = snprintf(range_buf, sizeof(range_buf), "bytes */%lld",
+ static_cast<long long>(total));
+ } else {
+ len = snprintf(range_buf, sizeof(range_buf), "bytes %lld-%lld/%lld",
+ static_cast<long long>(ofs),
+ static_cast<long long>(end),
+ static_cast<long long>(total));
+ }
+ return dump_header(s, "Content-Range", boost::string_ref(range_buf, len));
+int dump_body(struct req_state* const s,
+ const char* const buf,
+ const size_t len)
+ try {
+ return RESTFUL_IO(s)->send_body(buf, len);
+ } catch (rgw::io::Exception& e) {
+ return -e.code().value();
+ }
+int dump_body(struct req_state* const s, /* const */ ceph::buffer::list& bl)
+ return dump_body(s, bl.c_str(), bl.length());
+int dump_body(struct req_state* const s, const std::string& str)
+ return dump_body(s, str.c_str(), str.length());
+int recv_body(struct req_state* const s,
+ char* const buf,
+ const size_t max)
+ try {
+ return RESTFUL_IO(s)->recv_body(buf, max);
+ } catch (rgw::io::Exception& e) {
+ return -e.code().value();
+ }
+int RGWGetObj_ObjStore::get_params()
+ range_str = s->info.env->get("HTTP_RANGE");
+ if_mod = s->info.env->get("HTTP_IF_MODIFIED_SINCE");
+ if_unmod = s->info.env->get("HTTP_IF_UNMODIFIED_SINCE");
+ if_match = s->info.env->get("HTTP_IF_MATCH");
+ if_nomatch = s->info.env->get("HTTP_IF_NONE_MATCH");
+ if (s->system_request) {
+ mod_zone_id = s->info.env->get_int("HTTP_DEST_ZONE_SHORT_ID", 0);
+ mod_pg_ver = s->info.env->get_int("HTTP_DEST_PG_VER", 0);
+ rgwx_stat = s->info.args.exists(RGW_SYS_PARAM_PREFIX "stat");
+ get_data &= (!rgwx_stat);
+ }
+ if (s->info.args.exists(GET_TORRENT)) {
+ return torrent.get_params();
+ }
+ return 0;
+int RESTArgs::get_string(struct req_state *s, const string& name,
+ const string& def_val, string *val, bool *existed)
+ bool exists;
+ *val = s->info.args.get(name, &exists);
+ if (existed)
+ *existed = exists;
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+ return 0;
+int RESTArgs::get_uint64(struct req_state *s, const string& name,
+ uint64_t def_val, uint64_t *val, bool *existed)
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+ if (existed)
+ *existed = exists;
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+ int r = stringtoull(sval, val);
+ if (r < 0)
+ return r;
+ return 0;
+int RESTArgs::get_int64(struct req_state *s, const string& name,
+ int64_t def_val, int64_t *val, bool *existed)
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+ if (existed)
+ *existed = exists;
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+ int r = stringtoll(sval, val);
+ if (r < 0)
+ return r;
+ return 0;
+int RESTArgs::get_uint32(struct req_state *s, const string& name,
+ uint32_t def_val, uint32_t *val, bool *existed)
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+ if (existed)
+ *existed = exists;
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+ int r = stringtoul(sval, val);
+ if (r < 0)
+ return r;
+ return 0;
+int RESTArgs::get_int32(struct req_state *s, const string& name,
+ int32_t def_val, int32_t *val, bool *existed)
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+ if (existed)
+ *existed = exists;
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+ int r = stringtol(sval, val);
+ if (r < 0)
+ return r;
+ return 0;
+int RESTArgs::get_time(struct req_state *s, const string& name,
+ const utime_t& def_val, utime_t *val, bool *existed)
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+ if (existed)
+ *existed = exists;
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+ uint64_t epoch, nsec;
+ int r = utime_t::parse_date(sval, &epoch, &nsec);
+ if (r < 0)
+ return r;
+ *val = utime_t(epoch, nsec);
+ return 0;
+int RESTArgs::get_epoch(struct req_state *s, const string& name, uint64_t def_val, uint64_t *epoch, bool *existed)
+ bool exists;
+ string date = s->info.args.get(name, &exists);
+ if (existed)
+ *existed = exists;
+ if (!exists) {
+ *epoch = def_val;
+ return 0;
+ }
+ int r = utime_t::parse_date(date, epoch, NULL);
+ if (r < 0)
+ return r;
+ return 0;
+int RESTArgs::get_bool(struct req_state *s, const string& name, bool def_val, bool *val, bool *existed)
+ bool exists;
+ string sval = s->info.args.get(name, &exists);
+ if (existed)
+ *existed = exists;
+ if (!exists) {
+ *val = def_val;
+ return 0;
+ }
+ const char *str = sval.c_str();
+ if (sval.empty() ||
+ strcasecmp(str, "true") == 0 ||
+"1") == 0) {
+ *val = true;
+ return 0;
+ }
+ if (strcasecmp(str, "false") != 0 &&
+"0") != 0) {
+ *val = def_val;
+ return -EINVAL;
+ }
+ *val = false;
+ return 0;
+void RGWRESTFlusher::do_start(int ret)
+ set_req_state_err(s, ret); /* no going back from here */
+ dump_errno(s);
+ dump_start(s);
+ end_header(s, op);
+ rgw_flush_formatter_and_reset(s, s->formatter);
+void RGWRESTFlusher::do_flush()
+ rgw_flush_formatter(s, s->formatter);
+int RGWPutObj_ObjStore::verify_params()
+ if (s->length) {
+ off_t len = atoll(s->length);
+ if (len > (off_t)(s->cct->_conf->rgw_max_put_size)) {
+ return -ERR_TOO_LARGE;
+ }
+ }
+ return 0;
+int RGWPutObj_ObjStore::get_params()
+ /* start gettorrent */
+ if (s->cct->_conf->rgw_torrent_flag)
+ {
+ int ret = 0;
+ ret = torrent.get_params();
+ ldout(s->cct, 5) << "NOTICE: open produce torrent file " << dendl;
+ if (ret < 0)
+ {
+ return ret;
+ }
+ torrent.set_info_name((s->object).name);
+ }
+ /* end gettorrent */
+ supplied_md5_b64 = s->info.env->get("HTTP_CONTENT_MD5");
+ return 0;
+int RGWPutObj_ObjStore::get_data(bufferlist& bl)
+ size_t cl;
+ uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
+ if (s->length) {
+ cl = atoll(s->length) - ofs;
+ if (cl > chunk_size)
+ cl = chunk_size;
+ } else {
+ cl = chunk_size;
+ }
+ int len = 0;
+ {
+ ACCOUNTING_IO(s)->set_account(true);
+ bufferptr bp(cl);
+ const auto read_len = recv_body(s, bp.c_str(), cl);
+ if (read_len < 0) {
+ return read_len;
+ }
+ len = read_len;
+ bl.append(bp, 0, len);
+ ACCOUNTING_IO(s)->set_account(false);
+ }
+ if ((uint64_t)ofs + len > s->cct->_conf->rgw_max_put_size) {
+ return -ERR_TOO_LARGE;
+ }
+ return len;
+ * parses params in the format: 'first; param1=foo; param2=bar'
+ */
+void RGWPostObj_ObjStore::parse_boundary_params(const std::string& params_str,
+ std::string& first,
+ std::map<std::string,
+ std::string>& params)
+ size_t pos = params_str.find(';');
+ if (std::string::npos == pos) {
+ first = rgw_trim_whitespace(params_str);
+ return;
+ }
+ first = rgw_trim_whitespace(params_str.substr(0, pos));
+ pos++;
+ while (pos < params_str.size()) {
+ size_t end = params_str.find(';', pos);
+ if (std::string::npos == end) {
+ end = params_str.size();
+ }
+ std::string param = params_str.substr(pos, end - pos);
+ size_t eqpos = param.find('=');
+ if (std::string::npos != eqpos) {
+ std::string param_name = rgw_trim_whitespace(param.substr(0, eqpos));
+ std::string val = rgw_trim_quotes(param.substr(eqpos + 1));
+ params[std::move(param_name)] = std::move(val);
+ } else {
+ params[rgw_trim_whitespace(param)] = "";
+ }
+ pos = end + 1;
+ }
+int RGWPostObj_ObjStore::parse_part_field(const std::string& line,
+ std::string& field_name, /* out */
+ post_part_field& field) /* out */
+ size_t pos = line.find(':');
+ if (pos == string::npos)
+ return -EINVAL;
+ field_name = line.substr(0, pos);
+ if (pos >= line.size() - 1)
+ return 0;
+ parse_boundary_params(line.substr(pos + 1), field.val, field.params);
+ return 0;
+static bool is_crlf(const char *s)
+ return (*s == '\r' && *(s + 1) == '\n');
+ * find the index of the boundary, if exists, or optionally the next end of line
+ * also returns how many bytes to skip
+ */
+static int index_of(ceph::bufferlist& bl,
+ uint64_t max_len,
+ const std::string& str,
+ const bool check_crlf,
+ bool& reached_boundary,
+ int& skip)
+ reached_boundary = false;
+ skip = 0;
+ if (str.size() < 2) // we assume boundary is at least 2 chars (makes it easier with crlf checks)
+ return -EINVAL;
+ if (bl.length() < str.size())
+ return -1;
+ const char *buf = bl.c_str();
+ const char *s = str.c_str();
+ if (max_len > bl.length())
+ max_len = bl.length();
+ for (uint64_t i = 0; i < max_len; i++, buf++) {
+ if (check_crlf &&
+ i >= 1 &&
+ is_crlf(buf - 1)) {
+ return i + 1; // skip the crlf
+ }
+ if ((i < max_len - str.size() + 1) &&
+ (buf[0] == s[0] && buf[1] == s[1]) &&
+ (strncmp(buf, s, str.size()) == 0)) {
+ reached_boundary = true;
+ skip = str.size();
+ /* oh, great, now we need to swallow the preceding crlf
+ * if exists
+ */
+ if ((i >= 2) &&
+ is_crlf(buf - 2)) {
+ i -= 2;
+ skip += 2;
+ }
+ return i;
+ }
+ }
+ return -1;
+int RGWPostObj_ObjStore::read_with_boundary(ceph::bufferlist& bl,
+ uint64_t max,
+ const bool check_crlf,
+ bool& reached_boundary,
+ bool& done)
+ uint64_t cl = max + 2 + boundary.size();
+ if (max > in_data.length()) {
+ uint64_t need_to_read = cl - in_data.length();
+ bufferptr bp(need_to_read);
+ const auto read_len = recv_body(s, bp.c_str(), need_to_read);
+ if (read_len < 0) {
+ return read_len;
+ }
+ in_data.append(bp, 0, read_len);
+ }
+ done = false;
+ int skip;
+ const int index = index_of(in_data, cl, boundary, check_crlf,
+ reached_boundary, skip);
+ if (index >= 0) {
+ max = index;
+ }
+ if (max > in_data.length()) {
+ max = in_data.length();
+ }
+ bl.substr_of(in_data, 0, max);
+ ceph::bufferlist new_read_data;
+ /*
+ * now we need to skip boundary for next time, also skip any crlf, or
+ * check to see if it's the last final boundary (marked with "--" at the end
+ */
+ if (reached_boundary) {
+ int left = in_data.length() - max;
+ if (left < skip + 2) {
+ int need = skip + 2 - left;
+ bufferptr boundary_bp(need);
+ const int r = recv_body(s, boundary_bp.c_str(), need);
+ if (r < 0) {
+ return r;
+ }
+ in_data.append(boundary_bp);
+ }
+ max += skip; // skip boundary for next time
+ if (in_data.length() >= max + 2) {
+ const char *data = in_data.c_str();
+ if (is_crlf(data + max)) {
+ max += 2;
+ } else {
+ if (*(data + max) == '-' &&
+ *(data + max + 1) == '-') {
+ done = true;
+ max += 2;
+ }
+ }
+ }
+ }
+ new_read_data.substr_of(in_data, max, in_data.length() - max);
+ in_data = new_read_data;
+ return 0;
+int RGWPostObj_ObjStore::read_line(ceph::bufferlist& bl,
+ const uint64_t max,
+ bool& reached_boundary,
+ bool& done)
+ return read_with_boundary(bl, max, true, reached_boundary, done);
+int RGWPostObj_ObjStore::read_data(ceph::bufferlist& bl,
+ const uint64_t max,
+ bool& reached_boundary,
+ bool& done)
+ return read_with_boundary(bl, max, false, reached_boundary, done);
+int RGWPostObj_ObjStore::read_form_part_header(struct post_form_part* const part,
+ bool& done)
+ bufferlist bl;
+ bool reached_boundary;
+ uint64_t chunk_size = s->cct->_conf->rgw_max_chunk_size;
+ int r = read_line(bl, chunk_size, reached_boundary, done);
+ if (r < 0) {
+ return r;
+ }
+ if (done) {
+ return 0;
+ }
+ if (reached_boundary) { // skip the first boundary
+ r = read_line(bl, chunk_size, reached_boundary, done);
+ if (r < 0) {
+ return r;
+ } else if (done) {
+ return 0;
+ }
+ }
+ while (true) {
+ /*
+ * iterate through fields
+ */
+ std::string line = rgw_trim_whitespace(string(bl.c_str(), bl.length()));
+ if (line.empty()) {
+ break;
+ }
+ struct post_part_field field;
+ string field_name;
+ r = parse_part_field(line, field_name, field);
+ if (r < 0) {
+ return r;
+ }
+ part->fields[field_name] = field;
+ if (stringcasecmp(field_name, "Content-Disposition") == 0) {
+ part->name = field.params["name"];
+ }
+ if (reached_boundary) {
+ break;
+ }
+ r = read_line(bl, chunk_size, reached_boundary, done);
+ if (r < 0) {
+ return r;
+ }
+ }
+ return 0;
+bool RGWPostObj_ObjStore::part_str(parts_collection_t& parts,
+ const std::string& name,
+ std::string* val)
+ const auto iter = parts.find(name);
+ if (std::end(parts) == iter) {
+ return false;
+ }
+ ceph::bufferlist& data = iter->;
+ std::string str = string(data.c_str(), data.length());
+ *val = rgw_trim_whitespace(str);
+ return true;
+std::string RGWPostObj_ObjStore::get_part_str(parts_collection_t& parts,
+ const std::string& name,
+ const std::string& def_val)
+ std::string val;
+ if (part_str(parts, name, &val)) {
+ return val;
+ } else {
+ return rgw_trim_whitespace(def_val);
+ }
+bool RGWPostObj_ObjStore::part_bl(parts_collection_t& parts,
+ const std::string& name,
+ ceph::bufferlist* pbl)
+ const auto iter = parts.find(name);
+ if (std::end(parts) == iter) {
+ return false;
+ }
+ *pbl = iter->;
+ return true;
+int RGWPostObj_ObjStore::verify_params()
+ /* check that we have enough memory to store the object
+ note that this test isn't exact and may fail unintentionally
+ for large requests is */
+ if (!s->length) {
+ }
+ off_t len = atoll(s->length);
+ if (len > (off_t)(s->cct->_conf->rgw_max_put_size)) {
+ return -ERR_TOO_LARGE;
+ }
+ supplied_md5_b64 = s->info.env->get("HTTP_CONTENT_MD5");
+ return 0;
+int RGWPostObj_ObjStore::get_params()
+ if (s->expect_cont) {
+ /* OK, here it really gets ugly. With POST, the params are embedded in the
+ * request body, so we need to continue before being able to actually look
+ * at them. This diverts from the usual request flow. */
+ dump_continue(s);
+ s->expect_cont = false;
+ }
+ std::string req_content_type_str = s->info.env->get("CONTENT_TYPE", "");
+ std::string req_content_type;
+ std::map<std::string, std::string> params;
+ parse_boundary_params(req_content_type_str, req_content_type, params);
+ if ("multipart/form-data") != 0) {
+ err_msg = "Request Content-Type is not multipart/form-data";
+ return -EINVAL;
+ }
+ if (s->cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+ ldout(s->cct, 20) << "request content_type_str="
+ << req_content_type_str << dendl;
+ ldout(s->cct, 20) << "request content_type params:" << dendl;
+ for (const auto& pair : params) {
+ ldout(s->cct, 20) << " " << pair.first << " -> " << pair.second
+ << dendl;
+ }
+ }
+ const auto iter = params.find("boundary");
+ if (std::end(params) == iter) {
+ err_msg = "Missing multipart boundary specification";
+ return -EINVAL;
+ }
+ /* Create the boundary. */
+ boundary = "--";
+ boundary.append(iter->second);
+ return 0;
+int RGWPutACLs_ObjStore::get_params()
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ ldout(s->cct, 0) << "RGWPutACLs_ObjStore::get_params read data is: " << data.c_str() << dendl;
+ return op_ret;
+int RGWPutLC_ObjStore::get_params()
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ return op_ret;
+int RGWPutBucketObjectLock_ObjStore::get_params()
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ return op_ret;
+int RGWPutObjLegalHold_ObjStore::get_params()
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ return op_ret;
+static std::tuple<int, bufferlist> read_all_chunked_input(req_state *s, const uint64_t max_read)
+#define READ_CHUNK 4096
+#define MAX_READ_CHUNK (128 * 1024)
+ int need_to_read = READ_CHUNK;
+ int total = need_to_read;
+ bufferlist bl;
+ int read_len = 0, len = 0;
+ do {
+ bufferptr bp(need_to_read + 1);
+ read_len = recv_body(s, bp.c_str(), need_to_read);
+ if (read_len < 0) {
+ return std::make_tuple(read_len, std::move(bl));
+ }
+ bp.c_str()[read_len] = '\0';
+ bp.set_length(read_len);
+ bl.append(bp);
+ len += read_len;
+ if (read_len == need_to_read) {
+ if (need_to_read < MAX_READ_CHUNK)
+ need_to_read *= 2;
+ if ((unsigned)total > max_read) {
+ return std::make_tuple(-ERANGE, std::move(bl));
+ }
+ total += need_to_read;
+ } else {
+ break;
+ }
+ } while (true);
+ return std::make_tuple(0, std::move(bl));
+std::tuple<int, bufferlist > rgw_rest_read_all_input(struct req_state *s,
+ const uint64_t max_len,
+ const bool allow_chunked)
+ size_t cl = 0;
+ int len = 0;
+ bufferlist bl;
+ if (s->length)
+ cl = atoll(s->length);
+ else if (!allow_chunked)
+ return std::make_tuple(-ERR_LENGTH_REQUIRED, std::move(bl));
+ if (cl) {
+ if (cl > (size_t)max_len) {
+ return std::make_tuple(-ERANGE, std::move(bl));
+ }
+ bufferptr bp(cl + 1);
+ len = recv_body(s, bp.c_str(), cl);
+ if (len < 0) {
+ return std::make_tuple(len, std::move(bl));
+ }
+ bp.c_str()[len] = '\0';
+ bp.set_length(len);
+ bl.append(bp);
+ } else if (allow_chunked && !s->length) {
+ const char *encoding = s->info.env->get("HTTP_TRANSFER_ENCODING");
+ if (!encoding || strcmp(encoding, "chunked") != 0)
+ return std::make_tuple(-ERR_LENGTH_REQUIRED, std::move(bl));
+ int ret = 0;
+ std::tie(ret, bl) = read_all_chunked_input(s, max_len);
+ if (ret < 0)
+ return std::make_tuple(ret, std::move(bl));
+ }
+ return std::make_tuple(0, std::move(bl));
+int RGWCompleteMultipart_ObjStore::get_params()
+ upload_id = s->info.args.get("uploadId");
+ if (upload_id.empty()) {
+ op_ret = -ENOTSUP;
+ return op_ret;
+ }
+#define COMPLETE_MULTIPART_MAX_LEN (1024 * 1024) /* api defines max 10,000 parts, this should be enough */
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, COMPLETE_MULTIPART_MAX_LEN);
+ if (op_ret < 0)
+ return op_ret;
+ return 0;
+int RGWListMultipart_ObjStore::get_params()
+ upload_id = s->info.args.get("uploadId");
+ if (upload_id.empty()) {
+ op_ret = -ENOTSUP;
+ }
+ string marker_str = s->info.args.get("part-number-marker");
+ if (!marker_str.empty()) {
+ string err;
+ marker = strict_strtol(marker_str.c_str(), 10, &err);
+ if (!err.empty()) {
+ ldout(s->cct, 20) << "bad marker: " << marker << dendl;
+ op_ret = -EINVAL;
+ return op_ret;
+ }
+ }
+ string str = s->info.args.get("max-parts");
+ op_ret = parse_value_and_bound(str, max_parts, 0,
+ g_conf().get_val<uint64_t>("rgw_max_listing_results"),
+ max_parts);
+ return op_ret;
+int RGWListBucketMultiparts_ObjStore::get_params()
+ delimiter = s->info.args.get("delimiter");
+ prefix = s->info.args.get("prefix");
+ string str = s->info.args.get("max-uploads");
+ op_ret = parse_value_and_bound(str, max_uploads, 0,
+ g_conf().get_val<uint64_t>("rgw_max_listing_results"),
+ default_max);
+ if (op_ret < 0) {
+ return op_ret;
+ }
+ string key_marker = s->info.args.get("key-marker");
+ string upload_id_marker = s->info.args.get("upload-id-marker");
+ if (!key_marker.empty())
+ marker.init(key_marker, upload_id_marker);
+ return 0;
+int RGWDeleteMultiObj_ObjStore::get_params()
+ if (s->bucket_name.empty()) {
+ op_ret = -EINVAL;
+ return op_ret;
+ }
+ // everything is probably fine, set the bucket
+ bucket = s->bucket;
+ const auto max_size = s->cct->_conf->rgw_max_put_param_size;
+ std::tie(op_ret, data) = rgw_rest_read_all_input(s, max_size, false);
+ return op_ret;
+void RGWRESTOp::send_response()
+ if (!flusher.did_start()) {
+ set_req_state_err(s, http_ret);
+ dump_errno(s);
+ end_header(s, this);
+ }
+ flusher.flush();
+int RGWRESTOp::verify_permission()
+ return check_caps(s->user->caps);
+RGWOp* RGWHandler_REST::get_op(RGWRados* store)
+ RGWOp *op;
+ switch (s->op) {
+ case OP_GET:
+ op = op_get();
+ break;
+ case OP_PUT:
+ op = op_put();
+ break;
+ case OP_DELETE:
+ op = op_delete();
+ break;
+ case OP_HEAD:
+ op = op_head();
+ break;
+ case OP_POST:
+ op = op_post();
+ break;
+ case OP_COPY:
+ op = op_copy();
+ break;
+ case OP_OPTIONS:
+ op = op_options();
+ break;
+ default:
+ return NULL;
+ }
+ if (op) {
+ op->init(store, s, this);
+ }
+ return op;
+} /* get_op */
+void RGWHandler_REST::put_op(RGWOp* op)
+ delete op;
+} /* put_op */
+int RGWHandler_REST::allocate_formatter(struct req_state *s,
+ int default_type,
+ bool configurable)
+ s->format = -1; // set to invalid value to allocation happens anyway
+ auto type = default_type;
+ if (configurable) {
+ string format_str = s->info.args.get("format");
+ if ("xml") == 0) {
+ type = RGW_FORMAT_XML;
+ } else if ("json") == 0) {
+ } else if ("html") == 0) {
+ } else {
+ const char *accept = s->info.env->get("HTTP_ACCEPT");
+ if (accept) {
+ char format_buf[64];
+ unsigned int i = 0;
+ for (; i < sizeof(format_buf) - 1 && accept[i] && accept[i] != ';'; ++i) {
+ format_buf[i] = accept[i];
+ }
+ format_buf[i] = 0;
+ if ((strcmp(format_buf, "text/xml") == 0) || (strcmp(format_buf, "application/xml") == 0)) {
+ type = RGW_FORMAT_XML;
+ } else if (strcmp(format_buf, "application/json") == 0) {
+ } else if (strcmp(format_buf, "text/html") == 0) {
+ }
+ }
+ }
+ }
+ return RGWHandler_REST::reallocate_formatter(s, type);
+int RGWHandler_REST::reallocate_formatter(struct req_state *s, int type)
+ if (s->format == type) {
+ // do nothing, just reset
+ ceph_assert(s->formatter);
+ s->formatter->reset();
+ return 0;
+ }
+ delete s->formatter;
+ s->formatter = nullptr;
+ s->format = type;
+ const string& mm = s->info.args.get("multipart-manifest");
+ const bool multipart_delete = ("delete") == 0);
+ const bool swift_bulkupload = s->prot_flags & RGW_REST_SWIFT &&
+ s->info.args.exists("extract-archive");
+ switch (s->format) {
+ {
+ const bool use_kv_syntax = s->info.args.exists("bulk-delete") ||
+ multipart_delete || swift_bulkupload;
+ s->formatter = new RGWFormatter_Plain(use_kv_syntax);
+ break;
+ }
+ {
+ const bool lowercase_underscore = s->info.args.exists("bulk-delete") ||
+ multipart_delete || swift_bulkupload;
+ s->formatter = new XMLFormatter(false, lowercase_underscore);
+ break;
+ }
+ s->formatter = new JSONFormatter(false);
+ break;
+ s->formatter = new HTMLFormatter(s->prot_flags & RGW_REST_WEBSITE);
+ break;
+ default:
+ return -EINVAL;
+ };
+ //s->formatter->reset(); // All formatters should reset on create already
+ return 0;
+// This function enforces Amazon's spec for bucket names.
+// (The requirements, not the recommendations.)
+int RGWHandler_REST::validate_bucket_name(const string& bucket)
+ int len = bucket.size();
+ if (len < 3) {
+ if (len == 0) {
+ // This request doesn't specify a bucket at all
+ return 0;
+ }
+ // Name too short
+ }
+ else if (len > MAX_BUCKET_NAME_LEN) {
+ // Name too long
+ }
+ const char *s = bucket.c_str();
+ for (int i = 0; i < len; ++i, ++s) {
+ if (*(unsigned char *)s == 0xff)
+ if (*(unsigned char *)s == '/')
+ }
+ return 0;
+// "The name for a key is a sequence of Unicode characters whose UTF-8 encoding
+// is at most 1024 bytes long."
+// However, we can still have control characters and other nasties in there.
+// Just as long as they're utf-8 nasties.
+int RGWHandler_REST::validate_object_name(const string& object)
+ int len = object.size();
+ if (len > MAX_OBJ_NAME_LEN) {
+ // Name too long
+ }
+ if (check_utf8(object.c_str(), len)) {
+ // Object names must be valid UTF-8.
+ }
+ return 0;
+static http_op op_from_method(const char *method)
+ if (!method)
+ return OP_UNKNOWN;
+ if (strcmp(method, "GET") == 0)
+ return OP_GET;
+ if (strcmp(method, "PUT") == 0)
+ return OP_PUT;
+ if (strcmp(method, "DELETE") == 0)
+ return OP_DELETE;
+ if (strcmp(method, "HEAD") == 0)
+ return OP_HEAD;
+ if (strcmp(method, "POST") == 0)
+ return OP_POST;
+ if (strcmp(method, "COPY") == 0)
+ return OP_COPY;
+ if (strcmp(method, "OPTIONS") == 0)
+ return OP_OPTIONS;
+ return OP_UNKNOWN;
+int RGWHandler_REST::init_permissions(RGWOp* op)
+ if (op->get_type() == RGW_OP_CREATE_BUCKET) {
+ // We don't need user policies in case of STS token returned by AssumeRole, hence the check for user type
+ if (! s->user->user_id.empty() && s->auth.identity->get_identity_type() != TYPE_ROLE) {
+ try {
+ map<string, bufferlist> uattrs;
+ if (auto ret = rgw_get_user_attrs_by_uid(store, s->user->user_id, uattrs); ! ret) {
+ if (s->iam_user_policies.empty()) {
+ s->iam_user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->user_id.tenant);
+ } else {
+ // This scenario can happen when a STS token has a policy, then we need to append other user policies
+ // to the existing ones. (e.g. token returned by GetSessionToken)
+ auto user_policies = get_iam_user_policy_from_attr(s->cct, store, uattrs, s->user->user_id.tenant);
+ s->iam_user_policies.insert(s->iam_user_policies.end(), user_policies.begin(), user_policies.end());
+ }
+ }
+ } catch (const std::exception& e) {
+ lderr(s->cct) << "Error reading IAM User Policy: " << e.what() << dendl;
+ }
+ }
+ rgw_build_iam_environment(store, s);
+ return 0;
+ }
+ return do_init_permissions();
+int RGWHandler_REST::read_permissions(RGWOp* op_obj)
+ bool only_bucket = false;
+ switch (s->op) {
+ case OP_HEAD:
+ case OP_GET:
+ only_bucket = false;
+ break;
+ case OP_PUT:
+ case OP_POST:
+ case OP_COPY:
+ /* is it a 'multi-object delete' request? */
+ if (s->info.args.exists("delete")) {
+ only_bucket = true;
+ break;
+ }
+ if (is_obj_update_op()) {
+ only_bucket = false;
+ break;
+ }
+ /* is it a 'create bucket' request? */
+ if (op_obj->get_type() == RGW_OP_CREATE_BUCKET)
+ return 0;
+ only_bucket = true;
+ break;
+ case OP_DELETE:
+ if (!s->info.args.exists("tagging")){
+ only_bucket = true;
+ }
+ break;
+ case OP_OPTIONS:
+ only_bucket = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return do_read_permissions(op_obj, only_bucket);
+void RGWRESTMgr::register_resource(string resource, RGWRESTMgr *mgr)
+ string r = "/";
+ r.append(resource);
+ /* do we have a resource manager registered for this entry point? */
+ map<string, RGWRESTMgr *>::iterator iter = resource_mgrs.find(r);
+ if (iter != resource_mgrs.end()) {
+ delete iter->second;
+ }
+ resource_mgrs[r] = mgr;
+ resources_by_size.insert(pair<size_t, string>(r.size(), r));
+ /* now build default resource managers for the path (instead of nested entry points)
+ * e.g., if the entry point is /auth/v1.0/ then we'd want to create a default
+ * manager for /auth/
+ */
+ size_t pos = r.find('/', 1);
+ while (pos != r.size() - 1 && pos != string::npos) {
+ string s = r.substr(0, pos);
+ iter = resource_mgrs.find(s);
+ if (iter == resource_mgrs.end()) { /* only register it if one does not exist */
+ resource_mgrs[s] = new RGWRESTMgr; /* a default do-nothing manager */
+ resources_by_size.insert(pair<size_t, string>(s.size(), s));
+ }
+ pos = r.find('/', pos + 1);
+ }
+void RGWRESTMgr::register_default_mgr(RGWRESTMgr *mgr)
+ delete default_mgr;
+ default_mgr = mgr;
+RGWRESTMgr* RGWRESTMgr::get_resource_mgr(struct req_state* const s,
+ const std::string& uri,
+ std::string* const out_uri)
+ *out_uri = uri;
+ multimap<size_t, string>::reverse_iterator iter;
+ for (iter = resources_by_size.rbegin(); iter != resources_by_size.rend(); ++iter) {
+ string& resource = iter->second;
+ if (, iter->first, resource) == 0 &&
+ (uri.size() == iter->first ||
+ uri[iter->first] == '/')) {
+ std::string suffix = uri.substr(iter->first);
+ return resource_mgrs[resource]->get_resource_mgr(s, suffix, out_uri);
+ }
+ }
+ if (default_mgr) {
+ return default_mgr->get_resource_mgr_as_default(s, uri, out_uri);
+ }
+ return this;
+void RGWREST::register_x_headers(const string& s_headers)
+ std::vector<std::string> hdrs = get_str_vec(s_headers);
+ for (auto& hdr : hdrs) {
+ boost::algorithm::to_upper(hdr); // XXX
+ (void) x_headers.insert(hdr);
+ }
+ map<string, RGWRESTMgr *>::iterator iter;
+ for (iter = resource_mgrs.begin(); iter != resource_mgrs.end(); ++iter) {
+ delete iter->second;
+ }
+ delete default_mgr;
+int64_t parse_content_length(const char *content_length)
+ int64_t len = -1;
+ if (*content_length == '\0') {
+ len = 0;
+ } else {
+ string err;
+ len = strict_strtoll(content_length, 10, &err);
+ if (!err.empty()) {
+ len = -1;
+ }
+ }
+ return len;
+int RGWREST::preprocess(struct req_state *s, rgw::io::BasicClient* cio)
+ req_info& info = s->info;
+ /* save the request uri used to hash on the client side. request_uri may suffer
+ modifications as part of the bucket encoding in the subdomain calling format.
+ request_uri_aws4 will be used under aws4 auth */
+ s->info.request_uri_aws4 = s->info.request_uri;
+ s->cio = cio;
+ // We need to know if this RGW instance is running the s3website API with a
+ // higher priority than regular S3 API, or possibly in place of the regular
+ // S3 API.
+ // Map the listing of rgw_enable_apis in REVERSE order, so that items near
+ // the front of the list have a higher number assigned (and -1 for items not in the list).
+ list<string> apis;
+ get_str_list(g_conf()->rgw_enable_apis, apis);
+ int api_priority_s3 = -1;
+ int api_priority_s3website = -1;
+ auto api_s3website_priority_rawpos = std::find(apis.begin(), apis.end(), "s3website");
+ auto api_s3_priority_rawpos = std::find(apis.begin(), apis.end(), "s3");
+ if (api_s3_priority_rawpos != apis.end()) {
+ api_priority_s3 = apis.size() - std::distance(apis.begin(), api_s3_priority_rawpos);
+ }
+ if (api_s3website_priority_rawpos != apis.end()) {
+ api_priority_s3website = apis.size() - std::distance(apis.begin(), api_s3website_priority_rawpos);
+ }
+ ldout(s->cct, 10) << "rgw api priority: s3=" << api_priority_s3 << " s3website=" << api_priority_s3website << dendl;
+ bool s3website_enabled = api_priority_s3website >= 0;
+ if ( {
+ ssize_t pos;
+ if ('[') == 0) {
+ pos =']');
+ if (pos >=1) {
+ =, pos-1);
+ }
+ } else {
+ pos =':');
+ if (pos >= 0) {
+ =, pos);
+ }
+ }
+ ldout(s->cct, 10) << "host=" << << dendl;
+ string domain;
+ string subdomain;
+ bool in_hosted_domain_s3website = false;
+ bool in_hosted_domain = rgw_find_host_in_domains(, &domain, &subdomain, hostnames_set);
+ string s3website_domain;
+ string s3website_subdomain;
+ if (s3website_enabled) {
+ in_hosted_domain_s3website = rgw_find_host_in_domains(, &s3website_domain, &s3website_subdomain, hostnames_s3website_set);
+ if (in_hosted_domain_s3website) {
+ in_hosted_domain = true; // TODO: should hostnames be a strict superset of hostnames_s3website?
+ domain = s3website_domain;
+ subdomain = s3website_subdomain;
+ }
+ }
+ ldout(s->cct, 20)
+ << "subdomain=" << subdomain
+ << " domain=" << domain
+ << " in_hosted_domain=" << in_hosted_domain
+ << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
+ << dendl;
+ if (g_conf()->rgw_resolve_cname
+ && !in_hosted_domain
+ && !in_hosted_domain_s3website) {
+ string cname;
+ bool found;
+ int r = rgw_resolver->resolve_cname(, cname, &found);
+ if (r < 0) {
+ ldout(s->cct, 0)
+ << "WARNING: rgw_resolver->resolve_cname() returned r=" << r
+ << dendl;
+ }
+ if (found) {
+ ldout(s->cct, 5) << "resolved host cname " << << " -> "
+ << cname << dendl;
+ in_hosted_domain =
+ rgw_find_host_in_domains(cname, &domain, &subdomain, hostnames_set);
+ if (s3website_enabled
+ && !in_hosted_domain_s3website) {
+ in_hosted_domain_s3website =
+ rgw_find_host_in_domains(cname, &s3website_domain,
+ &s3website_subdomain,
+ hostnames_s3website_set);
+ if (in_hosted_domain_s3website) {
+ in_hosted_domain = true; // TODO: should hostnames be a
+ // strict superset of hostnames_s3website?
+ domain = s3website_domain;
+ subdomain = s3website_subdomain;
+ }
+ }
+ ldout(s->cct, 20)
+ << "subdomain=" << subdomain
+ << " domain=" << domain
+ << " in_hosted_domain=" << in_hosted_domain
+ << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
+ << dendl;
+ }
+ }
+ // Handle A/CNAME records that point to the RGW storage, but do match the
+ // CNAME test above, per issue
+ // If BOTH domain & subdomain variables are empty, then none of the above
+ // cases matched anything, and we should fall back to using the Host header
+ // directly as the bucket name.
+ // As additional checks:
+ // - if the Host header is an IP, we're using path-style access without DNS
+ // - Also check that the Host header is a valid bucket name before using it.
+ // - Don't enable virtual hosting if no hostnames are configured
+ if (subdomain.empty()
+ && (domain.empty() || domain !=
+ && !looks_like_ip_address(
+ && RGWHandler_REST::validate_bucket_name( == 0
+ && !(hostnames_set.empty() && hostnames_s3website_set.empty())) {
+ subdomain.append(;
+ in_hosted_domain = 1;
+ }
+ if (s3website_enabled && api_priority_s3website > api_priority_s3) {
+ in_hosted_domain_s3website = 1;
+ }
+ if (in_hosted_domain_s3website) {
+ s->prot_flags |= RGW_REST_WEBSITE;
+ }
+ if (in_hosted_domain && !subdomain.empty()) {
+ string encoded_bucket = "/";
+ encoded_bucket.append(subdomain);
+ if (s->info.request_uri[0] != '/')
+ encoded_bucket.append("/");
+ encoded_bucket.append(s->info.request_uri);
+ s->info.request_uri = encoded_bucket;
+ }
+ if (!domain.empty()) {
+ s->info.domain = domain;
+ }
+ ldout(s->cct, 20)
+ << "final domain/bucket"
+ << " subdomain=" << subdomain
+ << " domain=" << domain
+ << " in_hosted_domain=" << in_hosted_domain
+ << " in_hosted_domain_s3website=" << in_hosted_domain_s3website
+ << " s->info.domain=" << s->info.domain
+ << " s->info.request_uri=" << s->info.request_uri
+ << dendl;
+ }
+ if (s->info.domain.empty()) {
+ s->info.domain = s->cct->_conf->rgw_dns_name;
+ }
+ s->decoded_uri = url_decode(s->info.request_uri);
+ /* Validate for being free of the '\0' buried in the middle of the string. */
+ if (std::strlen(s->decoded_uri.c_str()) != s->decoded_uri.length()) {
+ return -ERR_ZERO_IN_URL;
+ }
+ /* FastCGI specification, section 6.3
+ *
+ * ===
+ * The Authorizer application receives HTTP request information from the Web
+ * server on the FCGI_PARAMS stream, in the same format as a Responder. The
+ * Web server does not send CONTENT_LENGTH, PATH_INFO, PATH_TRANSLATED, and
+ * SCRIPT_NAME headers.
+ * ===
+ * Ergo if we are in Authorizer role, we MUST look at HTTP_CONTENT_LENGTH
+ * instead of CONTENT_LENGTH for the Content-Length.
+ *
+ * There is one slight wrinkle in this, and that's older versions of
+ * nginx/lighttpd/apache setting BOTH headers. As a result, we have to check
+ * both headers and can't always simply pick A or B.
+ */
+ const char* content_length = info.env->get("CONTENT_LENGTH");
+ const char* http_content_length = info.env->get("HTTP_CONTENT_LENGTH");
+ if (!http_content_length != !content_length) {
+ /* Easy case: one or the other is missing */
+ s->length = (content_length ? content_length : http_content_length);
+ } else if (s->cct->_conf->rgw_content_length_compat &&
+ content_length && http_content_length) {
+ /* Hard case: Both are set, we have to disambiguate */
+ int64_t content_length_i, http_content_length_i;
+ content_length_i = parse_content_length(content_length);
+ http_content_length_i = parse_content_length(http_content_length);
+ // Now check them:
+ if (http_content_length_i < 0) {
+ // HTTP_CONTENT_LENGTH is invalid, ignore it
+ } else if (content_length_i < 0) {
+ // CONTENT_LENGTH is invalid, and HTTP_CONTENT_LENGTH is valid
+ // Swap entries
+ content_length = http_content_length;
+ } else {
+ // Let's pick the larger size
+ if (content_length_i < http_content_length_i) {
+ // prefer the larger value
+ content_length = http_content_length;
+ }
+ }
+ s->length = content_length;
+ // End of: else if (s->cct->_conf->rgw_content_length_compat &&
+ // content_length &&
+ // http_content_length)
+ } else {
+ /* no content length was defined */
+ s->length = NULL;
+ }
+ if (s->length) {
+ if (*s->length == '\0') {
+ s->content_length = 0;
+ } else {
+ string err;
+ s->content_length = strict_strtoll(s->length, 10, &err);
+ if (!err.empty()) {
+ ldout(s->cct, 10) << "bad content length, aborting" << dendl;
+ return -EINVAL;
+ }
+ }
+ }
+ if (s->content_length < 0) {
+ ldout(s->cct, 10) << "negative content length, aborting" << dendl;
+ return -EINVAL;
+ }
+ map<string, string>::iterator giter;
+ for (giter = generic_attrs_map.begin(); giter != generic_attrs_map.end();
+ ++giter) {
+ const char *env = info.env->get(giter->first.c_str());
+ if (env) {
+ s->generic_attrs[giter->second] = env;
+ }
+ }
+ if (g_conf()->rgw_print_continue) {
+ const char *expect = info.env->get("HTTP_EXPECT");
+ s->expect_cont = (expect && !strcasecmp(expect, "100-continue"));
+ }
+ s->op = op_from_method(info.method);
+ info.init_meta_info(&s->has_bad_meta);
+ return 0;
+RGWHandler_REST* RGWREST::get_handler(
+ RGWRados * const store,
+ struct req_state* const s,
+ const rgw::auth::StrategyRegistry& auth_registry,
+ const std::string& frontend_prefix,
+ RGWRestfulIO* const rio,
+ RGWRESTMgr** const pmgr,
+ int* const init_error
+) {
+ *init_error = preprocess(s, rio);
+ if (*init_error < 0) {
+ return nullptr;
+ }
+ RGWRESTMgr *m = mgr.get_manager(s, frontend_prefix, s->decoded_uri,
+ &s->relative_uri);
+ if (! m) {
+ *init_error = -ERR_METHOD_NOT_ALLOWED;
+ return nullptr;
+ }
+ if (pmgr) {
+ *pmgr = m;
+ }
+ RGWHandler_REST* handler = m->get_handler(s, auth_registry, frontend_prefix);
+ if (! handler) {
+ *init_error = -ERR_METHOD_NOT_ALLOWED;
+ return NULL;
+ }
+ *init_error = handler->init(store, s, rio);
+ if (*init_error < 0) {
+ m->put_handler(handler);
+ return nullptr;
+ }
+ return handler;
+} /* get stream handler */